Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx
This commit is contained in:
@@ -1,12 +1,11 @@
|
||||
import copy
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.tokenizer import TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
|
||||
from core.concept import DEFINITION_TYPE_BNF
|
||||
from core.tokenizer import Tokenizer
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "SyaNode", "Python"]
|
||||
|
||||
@@ -141,7 +140,11 @@ class AtomConceptParserHelper:
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
|
||||
if nodes_sequences:
|
||||
instances = [self]
|
||||
for i in range(len(nodes_sequences) - 1):
|
||||
@@ -152,7 +155,7 @@ class AtomConceptParserHelper:
|
||||
for instance, node_sequence in zip(instances, nodes_sequences):
|
||||
for node in node_sequence:
|
||||
instance.sequence.append(node)
|
||||
if isinstance(node, UnrecognizedTokensNode) or \
|
||||
if isinstance(node, (UnrecognizedTokensNode, SourceCodeNode)) or \
|
||||
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
|
||||
instance.has_unrecognized = True
|
||||
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
@@ -193,22 +196,22 @@ class AtomConceptParserHelper:
|
||||
clone.has_unrecognized = self.has_unrecognized
|
||||
return clone
|
||||
|
||||
def _get_lexer_nodes_from_unrecognized(self):
|
||||
"""
|
||||
Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
:return:
|
||||
"""
|
||||
|
||||
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
|
||||
if not only_parsers_results.status:
|
||||
return None
|
||||
|
||||
return builtin_helpers.get_lexer_nodes(
|
||||
only_parsers_results.body.body,
|
||||
self.unrecognized_tokens.start,
|
||||
self.unrecognized_tokens.tokens)
|
||||
# def _get_lexer_nodes_from_unrecognized(self):
|
||||
# """
|
||||
# Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
#
|
||||
# if not only_parsers_results.status:
|
||||
# return None
|
||||
#
|
||||
# return builtin_helpers.get_lexer_nodes(
|
||||
# only_parsers_results.body.body,
|
||||
# self.unrecognized_tokens.start,
|
||||
# self.unrecognized_tokens.tokens)
|
||||
|
||||
|
||||
class AtomNodeParser(BaseNodeParser):
|
||||
@@ -230,7 +233,6 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("AtomNode", 50, **kwargs)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
@@ -239,7 +241,8 @@ class AtomNodeParser(BaseNodeParser):
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
|
||||
# return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
|
||||
return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
|
||||
@@ -255,6 +258,13 @@ class AtomNodeParser(BaseNodeParser):
|
||||
concept_parser_helpers.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
def _get_concepts_by_name(name):
|
||||
other_concepts = self.sheerka.get_by_name(name)
|
||||
if isinstance(other_concepts, list):
|
||||
return other_concepts
|
||||
|
||||
return [other_concepts] if self.sheerka.is_known(other_concepts) else []
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.next_token(False):
|
||||
@@ -268,7 +278,7 @@ class AtomNodeParser(BaseNodeParser):
|
||||
if concept_parser.eat_token(self.token, self.pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible)
|
||||
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, self.pos)
|
||||
|
||||
+195
-65
@@ -2,8 +2,9 @@ from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
@@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode):
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, SCN):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
@@ -352,6 +356,51 @@ class HelperWithPos:
|
||||
return self
|
||||
|
||||
|
||||
class SCN(HelperWithPos):
|
||||
"""
|
||||
SourceCodeNode tester class
|
||||
It matches with SourceCodeNode but with less constraints
|
||||
|
||||
SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
|
||||
"""
|
||||
|
||||
def __init__(self, source, start=None, end=None):
|
||||
super().__init__(start, end)
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, SourceCodeNode):
|
||||
if self.source != other.source:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
return False
|
||||
|
||||
return self.source == other.source and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"SCN(source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CN(HelperWithPos):
|
||||
"""
|
||||
ConceptNode tester class
|
||||
@@ -390,6 +439,8 @@ class CN(HelperWithPos):
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
if self.source is not None and self.source != other.source:
|
||||
return False
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
@@ -425,9 +476,10 @@ class CNC(CN):
|
||||
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
|
||||
"""
|
||||
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
self.exclude_body = exclude_body
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
@@ -442,7 +494,13 @@ class CNC(CN):
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
|
||||
if self.source is not None and self.source != other.source:
|
||||
return False
|
||||
if self.exclude_body:
|
||||
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
to_compare = other.concept.compiled
|
||||
return self.compiled == to_compare
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
@@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser):
|
||||
super().__init__(name, priority)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.init_from_sheerka(sheerka)
|
||||
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
self.sya_definitions = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
@@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser):
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def init_from_sheerka(self, sheerka):
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
"""
|
||||
Use the definitons from Sheerka to initialize
|
||||
:param sheerka:
|
||||
Initialize the parser with a list of concepts
|
||||
For unit tests convenience
|
||||
:param context
|
||||
:param concepts
|
||||
:return:
|
||||
"""
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
if sheerka.sya_definitions:
|
||||
self.sya_definitions = {}
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
|
||||
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
@@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
|
||||
"""
|
||||
To quickly find a concept, we store them in an hash where the key is the first token of the concept
|
||||
example :
|
||||
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
|
||||
So the key to use will be "foo"
|
||||
|
||||
Concept("a foo").def_prop("a") -> first token is "foo"
|
||||
|
||||
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
|
||||
Note that under the same key, there will be multiple entry
|
||||
a B-Tree may be a better implementation in the future
|
||||
|
||||
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
|
||||
:param context:
|
||||
:param concepts: list[Concept]
|
||||
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
|
||||
:param use_sheerka: first init with the definitions from Sheerka
|
||||
:return:
|
||||
"""
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
if use_sheerka:
|
||||
self.init_from_sheerka(self.sheerka)
|
||||
|
||||
if sya_definitions:
|
||||
if self.sya_definitions:
|
||||
self.sya_definitions.update(sya_definitions)
|
||||
else:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if self.concepts_by_first_keyword is None:
|
||||
self.concepts_by_first_keyword = {}
|
||||
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token, to_keep, to_map=None):
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
|
||||
:param to_map:
|
||||
:param strip_quotes: Remove quotes from strings
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
name = token.value[1:-1] if strip_quotes else token.value
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
custom_concepts = custom(name) if custom else []
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
for concept_id in self.concepts_by_first_keyword.get(name):
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept) if to_map else concept
|
||||
concept = to_map(self, concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result
|
||||
return result + custom_concepts
|
||||
|
||||
return None
|
||||
return custom_concepts if custom else None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
@@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser):
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
||||
"""
|
||||
Create the map describing the first token expected by a concept
|
||||
:param context:
|
||||
:param concepts: lists of concepts to parse
|
||||
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {}
|
||||
for concept in concepts:
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
|
||||
if keywords is None:
|
||||
# no first token found for a concept ?
|
||||
return sheerka.ret(sheerka.name, False, concept)
|
||||
|
||||
for keyword in keywords:
|
||||
res.setdefault(keyword, []).append(concept.id)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
||||
sheerka = context.sheerka
|
||||
|
||||
def _make_unique(elements):
|
||||
keys = {}
|
||||
for e in elements:
|
||||
keys[e] = 1
|
||||
return list(keys.keys())
|
||||
|
||||
def _resolve_concepts(concept_str):
|
||||
resolved = []
|
||||
to_resolve = []
|
||||
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
resolved += _resolve_concepts(concept_to_resolve_str)
|
||||
|
||||
return resolved
|
||||
|
||||
res = {}
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
resolved_keywords = _resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = _make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_sya_associativity_and_precedence(context, sya):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_first_tokens(sheerka, concept):
|
||||
"""
|
||||
|
||||
:param sheerka:
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.bnf:
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
bnf_visitor.visit(concept.bnf)
|
||||
return bnf_visitor.first_tokens
|
||||
else:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
return [keyword]
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
|
||||
from parsers.BnfParser import BnfParser
|
||||
regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {concept.metadata.definition}"
|
||||
with context.push(parser_name, obj=concept, desc=desc) as sub_context:
|
||||
sub_context.add_inputs(parser_input=concept.metadata.definition)
|
||||
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
|
||||
sub_context.add_values(return_values=bnf_parsing_ret_val)
|
||||
|
||||
if not bnf_parsing_ret_val.status:
|
||||
raise Exception(bnf_parsing_ret_val.value)
|
||||
|
||||
concept.bnf = bnf_parsing_ret_val.body.body
|
||||
if concept.id:
|
||||
context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache
|
||||
|
||||
+510
-432
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@ from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
StrMatch, ConceptGroupExpression
|
||||
StrMatch
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -234,8 +234,9 @@ class BnfParser(BaseParser):
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
self.next_token()
|
||||
concept = self.sheerka.new((token.value[0], token.value[1]))
|
||||
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
|
||||
else ConceptExpression(concept)
|
||||
expr = ConceptExpression(concept)
|
||||
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
|
||||
# else ConceptExpression(concept)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
@@ -259,8 +260,7 @@ class BnfParser(BaseParser):
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
|
||||
else ConceptExpression(concept)
|
||||
expr = ConceptExpression(concept)
|
||||
expr.rule_name = concept.name
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
# try to match something like
|
||||
# ConceptNode 'plus' ConceptNode
|
||||
#
|
||||
# Replaced by SyaNodeParser
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
tokens = []
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
||||
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
||||
else:
|
||||
for token in node.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
||||
continue
|
||||
else:
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
@staticmethod
|
||||
def get_key(nodes):
|
||||
key = ""
|
||||
index = 0
|
||||
for node in nodes:
|
||||
if key:
|
||||
key += " "
|
||||
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
key += node.source.strip()
|
||||
else:
|
||||
key += f"{VARIABLE_PREFIX}{index}"
|
||||
index += 1
|
||||
|
||||
return key
|
||||
|
||||
def finalize_concept(self, context, concept, nodes):
|
||||
index = 0
|
||||
for node in nodes:
|
||||
|
||||
if isinstance(node, ConceptNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
concept.compiled[prop_name] = node.concept
|
||||
context.log(
|
||||
f"Setting property '{prop_name}='{node.concept}'.",
|
||||
self.name)
|
||||
index += 1
|
||||
elif isinstance(node, SourceCodeNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
sheerka = context.sheerka
|
||||
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
||||
concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
||||
context.log(
|
||||
f"Setting property '{prop_name}'='Python({node.source})'.",
|
||||
self.name)
|
||||
index += 1
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
self.finalize_concept(context, concept, nodes)
|
||||
|
||||
res = []
|
||||
for concept in concepts:
|
||||
res.append(sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
return res[0] if len(res) == 1 else res
|
||||
@@ -384,7 +384,8 @@ class DefaultParser(BaseParser):
|
||||
return None, NotInitializedNode()
|
||||
|
||||
regex_parser = BnfParser()
|
||||
with self.context.push(self.name, obj=current_concept_def) as sub_context:
|
||||
desc = f"Resolving BNF {current_concept_def.definition}"
|
||||
with self.context.push(self.name, obj=current_concept_def, desc=desc) as sub_context:
|
||||
parsing_result = regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
from core.tokenizer import Keywords, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
@@ -11,10 +11,11 @@ class ExactConceptParser(BaseParser):
|
||||
Tries to recognize a single concept
|
||||
"""
|
||||
|
||||
MAX_WORDS_SIZE = 10
|
||||
MAX_WORDS_SIZE = 3
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
def __init__(self, max_word_size=None, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
self.max_word_size = max_word_size
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
@@ -33,11 +34,11 @@ class ExactConceptParser(BaseParser):
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
|
||||
|
||||
recognized = False
|
||||
recognized = [] # keep track of the concepts founds
|
||||
for combination in self.combinations(words):
|
||||
|
||||
concept_key = " ".join(combination)
|
||||
@@ -49,16 +50,23 @@ class ExactConceptParser(BaseParser):
|
||||
concepts = result if isinstance(result, list) else [result]
|
||||
|
||||
for concept in concepts:
|
||||
if concept.id in recognized:
|
||||
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
|
||||
# example
|
||||
# if the input is foo a and a concept is defined as foo a
|
||||
# The will be two matches. One for 'foo a' and 'foo _var_0'
|
||||
# but it's the same concept foo a
|
||||
continue
|
||||
|
||||
context.log(f"Recognized concept {concept}.", self.name)
|
||||
# update the properties if needed
|
||||
need_validation = False
|
||||
for i, token in enumerate(combination):
|
||||
if token.startswith(VARIABLE_PREFIX):
|
||||
index = int(token[len(VARIABLE_PREFIX):])
|
||||
concept.def_prop_by_index(index, words[i])
|
||||
concept.def_var_by_index(index, words[i])
|
||||
concept.metadata.need_validation = True
|
||||
if self.verbose_log.isEnabledFor(logging.DEBUG):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
prop_name = concept.metadata.variables[index][0]
|
||||
context.log(
|
||||
f"Added property {index}: {prop_name}='{words[i]}'.",
|
||||
self.name)
|
||||
@@ -69,12 +77,13 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(
|
||||
parser_input),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
recognized.append(concept.id)
|
||||
|
||||
if recognized:
|
||||
if len(recognized) > 0:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, parser_input, res[0])
|
||||
else:
|
||||
|
||||
@@ -318,7 +318,7 @@ class ExplainParser(BaseSplitIterParser):
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
parser_input can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
# to be replaced by SyaNodeParser
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = BnfNodeParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of BnfNodeParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
if not unrecognized_tokens:
|
||||
return nodes_found, unrecognized_tokens
|
||||
|
||||
unrecognized_tokens.fix_source()
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
|
||||
return nodes_found, None
|
||||
|
||||
@staticmethod
|
||||
def create_or_add(unrecognized_tokens, token, index):
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
unrecognized_tokens = None
|
||||
i = 0
|
||||
|
||||
while i < len(node.tokens):
|
||||
|
||||
token_index = node.start + i
|
||||
token = node.tokens[i]
|
||||
|
||||
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
if concepts_nodes is not None:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
if source_code_node:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
i += len(source_code_node.tokens)
|
||||
continue
|
||||
|
||||
# not a concept nor some source code
|
||||
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
i += 1
|
||||
|
||||
# finish processing if needed
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
|
||||
else:
|
||||
nodes_found = core.utils.product(nodes_found, [node])
|
||||
|
||||
ret = []
|
||||
for choice in nodes_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
concepts_only,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_nodes(context, index, token):
|
||||
"""
|
||||
Tries to recognize a concept
|
||||
from the univers of all known concepts
|
||||
"""
|
||||
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
return None
|
||||
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
return concepts_nodes
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_source_code_node(context, index, tokens):
|
||||
"""
|
||||
Tries to recognize source code.
|
||||
For the time being, only Python is supported
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param index:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
return None
|
||||
|
||||
end_index = len(tokens)
|
||||
while end_index > 0:
|
||||
parser = PythonParser()
|
||||
tokens_to_parse = tokens[:end_index]
|
||||
res = parser.parse(context, tokens_to_parse)
|
||||
if res.status:
|
||||
# only expression are accepted
|
||||
ast_ = res.value.value.ast_
|
||||
if not isinstance(ast_, ast.Expression):
|
||||
return None
|
||||
try:
|
||||
compiled = compile(ast_, "<string>", "eval")
|
||||
eval(compiled, {}, {})
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
end_index -= 1
|
||||
|
||||
return None
|
||||
@@ -1,11 +1,11 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
import ast
|
||||
import logging
|
||||
import core.utils
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
|
||||
+166
-118
@@ -1,4 +1,3 @@
|
||||
import copy
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
@@ -7,10 +6,10 @@ from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import LexerError, Token, TokenKind
|
||||
from core.tokenizer import Token, TokenKind
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "AtomNode", "Python"]
|
||||
|
||||
@@ -116,13 +115,13 @@ class SyaConceptParserHelper:
|
||||
return len(self.expected) == 0
|
||||
|
||||
def is_atom(self):
|
||||
return len(self.concept.concept.metadata.props) == 0 and len(self.expected) == 0
|
||||
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
|
||||
|
||||
def is_expected(self, token):
|
||||
if self.is_matched():
|
||||
return False
|
||||
|
||||
token_value = self._get_token_value(token)
|
||||
token_value = BaseNodeParser.get_token_value(token)
|
||||
|
||||
for expected in self.expected:
|
||||
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
|
||||
@@ -139,7 +138,7 @@ class SyaConceptParserHelper:
|
||||
|
||||
# return True is a whole sequence of keyword is eaten
|
||||
# example
|
||||
# Concept("foo a bar baz qux b").def_prop("a").def_prop("b")
|
||||
# Concept("foo a bar baz qux b").def_var("a").def_var("b")
|
||||
# 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting
|
||||
if len(self.expected) == 0:
|
||||
return True
|
||||
@@ -169,14 +168,14 @@ class SyaConceptParserHelper:
|
||||
self.concept = self.concept.concept
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
# @staticmethod
|
||||
# def _get_token_value(token):
|
||||
# if token.type == TokenKind.STRING:
|
||||
# return token.value[1:-1]
|
||||
# elif token.type == TokenKind.KEYWORD:
|
||||
# return token.value.value
|
||||
# else:
|
||||
# return token.value
|
||||
|
||||
def clone(self):
|
||||
clone = SyaConceptParserHelper(self.concept, self.start, self.end)
|
||||
@@ -215,7 +214,10 @@ class InFixToPostFix:
|
||||
if not isinstance(other, InFixToPostFix):
|
||||
return False
|
||||
|
||||
return self.out == other.out
|
||||
return self.out == other.out and self.errors == other.errors
|
||||
|
||||
def __hash__(self):
|
||||
return len(self.sequence) + len(self.errors)
|
||||
|
||||
def _add_error(self, error):
|
||||
self.errors.append(error)
|
||||
@@ -396,6 +398,7 @@ class InFixToPostFix:
|
||||
del current_concept.expected[0]
|
||||
|
||||
def manage_unrecognized(self):
|
||||
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
return
|
||||
|
||||
@@ -514,10 +517,10 @@ class InFixToPostFix:
|
||||
def handle_expected_token(self, token, pos):
|
||||
"""
|
||||
True if the token is part of the concept being parsed and the last token in a sequence is eaten
|
||||
Example : Concept("foo a bar b").def_prop("a").def_prop("b")
|
||||
Example : Concept("foo a bar b").def_var("a").def_var("b")
|
||||
The expected tokens are 'foo' and 'bar' (as a and b are parameters)
|
||||
|
||||
Example: Concept("foo a bar baz b").def_prop("a").def_prop("b")
|
||||
Example: Concept("foo a bar baz b").def_var("a").def_var("b")
|
||||
If the token is 'bar', it will be eaten but handle_expected_token() will return False
|
||||
as we still expect 'baz'
|
||||
:param token:
|
||||
@@ -565,6 +568,18 @@ class InFixToPostFix:
|
||||
|
||||
return True
|
||||
|
||||
# else:
|
||||
# if token.type != TokenKind.WHITESPACE:
|
||||
# # hack, because whitespaces are not correctly parsed in self.expected
|
||||
# # KSI 2020/04/25
|
||||
# # I no longer understand why we are in a loop (the reverse one)
|
||||
# # if we are parsing a concept and the expected token does not match
|
||||
# # The whole class should be in error
|
||||
# self._add_error(UnexpectedTokenErrorNode(
|
||||
# f"Failed to parse '{current_concept.concept.concept}'",
|
||||
# token, current_concept.expected))
|
||||
# return False
|
||||
|
||||
return False
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
@@ -581,7 +596,7 @@ class InFixToPostFix:
|
||||
|
||||
if self.handle_expected_token(token, pos):
|
||||
# a token is found, let's check if it's part of a concepts being parsed
|
||||
# example Concept(name="foo", definition="foo a bar b").def_prop("a").def_prop("b")
|
||||
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
|
||||
# if the token 'bar' is found, it has to be considered as part of the concept foo
|
||||
self.debug.append(token)
|
||||
return True
|
||||
@@ -780,16 +795,13 @@ class PostFixToItem:
|
||||
has_unrecognized: bool
|
||||
|
||||
|
||||
class SyaNodeParser(BaseParser):
|
||||
class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "SyaNode", 50)
|
||||
super().__init__("SyaNode", 50, **kwargs)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
self.sya_definitions = {}
|
||||
if sheerka.sya_definitions:
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
self.sya_definitions = sheerka.resolved_sya_def
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = {}
|
||||
@@ -803,104 +815,133 @@ class SyaNodeParser(BaseParser):
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts=None, sya_definitions=None):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
super().init_from_concepts(context, concepts)
|
||||
|
||||
sya_definitions = kwargs.get("sya", None)
|
||||
if sya_definitions:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if concepts:
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token):
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
Predicate that select concepts that must handled by AtomNodeParser
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
# We only concepts that has parameter (refuse atoms)
|
||||
# Bnf definitions are not supposed to be managed by this parser either
|
||||
return len(concept.metadata.variables) > 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
@staticmethod
|
||||
def _get_sya_concept_def(parser, concept):
|
||||
sya_concept_def = SyaConceptDef(concept)
|
||||
if concept.id in parser.sya_definitions:
|
||||
sya_def = parser.sya_definitions.get(concept.id)
|
||||
if sya_def[0] is not None:
|
||||
sya_concept_def.precedence = sya_def[0]
|
||||
if sya_def[1] is not None:
|
||||
sya_concept_def.associativity = sya_def[1]
|
||||
return sya_concept_def
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
# def reset_parser(self, context, text):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
# except LexerError as e:
|
||||
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
# return False
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# return True
|
||||
#
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.next_token()
|
||||
# return error
|
||||
#
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if len(concept.metadata.props) == 0:
|
||||
# only concepts that has parameter (refuse atoms)
|
||||
# Note that this test is needed if the definition of the concept has changed
|
||||
continue
|
||||
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
# bnf definitions are not supposed to be managed by this parser
|
||||
continue
|
||||
|
||||
sya_concept_def = SyaConceptDef(concept)
|
||||
if concept.id in self.sya_definitions:
|
||||
sya_def = self.sya_definitions[concept.id]
|
||||
if sya_def[0] is not None:
|
||||
sya_concept_def.precedence = sya_def[0]
|
||||
if sya_def[1] is not None:
|
||||
sya_concept_def.associativity = sya_def[1]
|
||||
|
||||
result.append(sya_concept_def)
|
||||
return result
|
||||
|
||||
return None
|
||||
# def initialize(self, context, concepts=None, sya_definitions=None):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
#
|
||||
# if sya_definitions:
|
||||
# self.sya_definitions = sya_definitions
|
||||
#
|
||||
# if concepts:
|
||||
# for concept in concepts:
|
||||
# keywords = concept.key.split()
|
||||
# for keyword in keywords:
|
||||
# if keyword.startswith(VARIABLE_PREFIX):
|
||||
# continue
|
||||
#
|
||||
# self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
# break
|
||||
#
|
||||
# return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
#
|
||||
# def get_concepts(self, token):
|
||||
# """
|
||||
# Tries to find if there are concepts that match the value of the token
|
||||
# :param token:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# if token.type == TokenKind.STRING:
|
||||
# name = token.value[1:-1]
|
||||
# elif token.type == TokenKind.KEYWORD:
|
||||
# name = token.value.value
|
||||
# else:
|
||||
# name = token.value
|
||||
#
|
||||
# result = []
|
||||
# if name in self.concepts_by_first_keyword:
|
||||
# for concept_id in self.concepts_by_first_keyword[name]:
|
||||
#
|
||||
# concept = self.sheerka.get_by_id(concept_id)
|
||||
#
|
||||
# if len(concept.metadata.props) == 0:
|
||||
# # only concepts that has parameter (refuse atoms)
|
||||
# # Note that this test is needed if the definition of the concept has changed
|
||||
# continue
|
||||
#
|
||||
# if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
# # bnf definitions are not supposed to be managed by this parser
|
||||
# continue
|
||||
#
|
||||
# sya_concept_def = SyaConceptDef(concept)
|
||||
# if concept.id in self.sya_definitions:
|
||||
# sya_def = self.sya_definitions[concept.id]
|
||||
# if sya_def[0] is not None:
|
||||
# sya_concept_def.precedence = sya_def[0]
|
||||
# if sya_def[1] is not None:
|
||||
# sya_concept_def.associativity = sya_def[1]
|
||||
#
|
||||
# result.append(sya_concept_def)
|
||||
# return result
|
||||
#
|
||||
# return None
|
||||
|
||||
def infix_to_postfix(self, context, text):
|
||||
"""
|
||||
@@ -943,7 +984,7 @@ class SyaNodeParser(BaseParser):
|
||||
if infix_to_postfix.eat_token(token, self.pos):
|
||||
infix_to_postfix.lock()
|
||||
|
||||
concepts = self.get_concepts(token)
|
||||
concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
|
||||
if not concepts:
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_unrecognized(token, self.pos)
|
||||
@@ -988,7 +1029,7 @@ class SyaNodeParser(BaseParser):
|
||||
else:
|
||||
items.append(res)
|
||||
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
|
||||
isinstance(res, UnrecognizedTokensNode)
|
||||
isinstance(res, UnrecognizedTokensNode)
|
||||
item.nodes = items
|
||||
item.fix_all_pos()
|
||||
item.tokens = self.tokens[item.start:item.end + 1]
|
||||
@@ -1000,7 +1041,7 @@ class SyaNodeParser(BaseParser):
|
||||
end = item.end
|
||||
has_unrecognized = False
|
||||
concept = sheerka.new_from_template(item.concept, item.concept.id)
|
||||
for param_index in reversed(range(len(concept.metadata.props))):
|
||||
for param_index in reversed(range(len(concept.metadata.variables))):
|
||||
inner_item = self.postfix_to_item(sheerka, postfixed)
|
||||
if inner_item.start < start:
|
||||
start = inner_item.start
|
||||
@@ -1008,7 +1049,7 @@ class SyaNodeParser(BaseParser):
|
||||
end = inner_item.end
|
||||
has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode)
|
||||
|
||||
param_name = concept.metadata.props[param_index][0]
|
||||
param_name = concept.metadata.variables[param_index][0]
|
||||
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
|
||||
[inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \
|
||||
inner_item
|
||||
@@ -1115,3 +1156,10 @@ class SyaNodeParser(BaseParser):
|
||||
result.append(infix_to_postfix)
|
||||
|
||||
return result
|
||||
|
||||
# @staticmethod
|
||||
# def init_sheerka(self, sheerka):
|
||||
# if hasattr(BaseNodeParser, "init_sheerka"):
|
||||
# BaseNodeParser.init_sheerka(sheerka)
|
||||
#
|
||||
# # init syadefinitins
|
||||
|
||||
@@ -52,11 +52,20 @@ class UnrecognizedNodeParser(BaseParser):
|
||||
res = only_successful(context, res)
|
||||
if res.status:
|
||||
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
|
||||
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
||||
if lexer_nodes:
|
||||
# make lexer_nodes is not empty (for example, some Python result are discarded)
|
||||
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True
|
||||
|
||||
elif isinstance(node, SourceCodeNode):
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True # never trust source code not. I may be an invalid source code
|
||||
|
||||
else: # cannot happen as of today :-)
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@@ -0,0 +1,912 @@
|
||||
# #####################################################################################################
|
||||
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# # I don't directly use the project, but it helped me figure out
|
||||
# # what to do.
|
||||
# # Dejanović I., Milosavljević G., Vaderna R.:
|
||||
# # Arpeggio: A flexible PEG parser for Python,
|
||||
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
|
||||
# #####################################################################################################
|
||||
# from collections import namedtuple
|
||||
# from dataclasses import dataclass
|
||||
# from collections import defaultdict
|
||||
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
# from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
# from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
||||
# from parsers.BaseParser import BaseParser, ErrorNode
|
||||
# import core.utils
|
||||
#
|
||||
#
|
||||
# class NonTerminalNode(LexerNode):
|
||||
# """
|
||||
# Returned by the BnfNodeParser
|
||||
# """
|
||||
#
|
||||
# def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
# super().__init__(start, end, tokens)
|
||||
# self.parsing_expression = parsing_expression
|
||||
# self.children = children
|
||||
#
|
||||
# def __repr__(self):
|
||||
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
||||
# if len(self.children) > 0:
|
||||
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||
# else:
|
||||
# sub_names = ""
|
||||
# return name + sub_names
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, NonTerminalNode):
|
||||
# return False
|
||||
#
|
||||
# return self.parsing_expression == other.parsing_expression and \
|
||||
# self.start == other.start and \
|
||||
# self.end == other.end and \
|
||||
# self.children == other.children
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||
#
|
||||
#
|
||||
# class TerminalNode(LexerNode):
|
||||
# """
|
||||
# Returned by the BnfNodeParser
|
||||
# """
|
||||
#
|
||||
# def __init__(self, parsing_expression, start, end, value):
|
||||
# super().__init__(start, end, source=value)
|
||||
# self.parsing_expression = parsing_expression
|
||||
# self.value = value
|
||||
#
|
||||
# def __repr__(self):
|
||||
# name = self.parsing_expression.rule_name or ""
|
||||
# return name + f"'{self.value}'"
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, TerminalNode):
|
||||
# return False
|
||||
#
|
||||
# return self.parsing_expression == other.parsing_expression and \
|
||||
# self.start == other.start and \
|
||||
# self.end == other.end and \
|
||||
# self.value == other.value
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class UnknownConceptNode(ErrorNode):
|
||||
# concept_key: str
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class TooManyConceptNode(ErrorNode):
|
||||
# concept_key: str
|
||||
#
|
||||
#
|
||||
# class ParsingExpression:
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# self.elements = args
|
||||
#
|
||||
# nodes = kwargs.get('nodes', [])
|
||||
# if not hasattr(nodes, '__iter__'):
|
||||
# nodes = [nodes]
|
||||
# self.nodes = nodes
|
||||
#
|
||||
# self.rule_name = kwargs.get('rule_name', '')
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, ParsingExpression):
|
||||
# return False
|
||||
#
|
||||
# return self.rule_name == other.rule_name and self.elements == other.elements
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.rule_name, self.elements))
|
||||
#
|
||||
# def parse(self, parser):
|
||||
# return self._parse(parser)
|
||||
#
|
||||
# def add_rule_name_if_needed(self, text):
|
||||
# return text + "=" + self.rule_name if self.rule_name else text
|
||||
#
|
||||
#
|
||||
# class ConceptExpression(ParsingExpression):
|
||||
# """
|
||||
# Will match a concept
|
||||
# It used only for rule definition
|
||||
#
|
||||
# When the grammar is created, it is replaced by the actual concept
|
||||
# """
|
||||
#
|
||||
# def __init__(self, concept, rule_name=""):
|
||||
# super().__init__(rule_name=rule_name)
|
||||
# self.concept = concept
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.add_rule_name_if_needed(f"{self.concept}")
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not super().__eq__(other):
|
||||
# return False
|
||||
#
|
||||
# if not isinstance(other, ConceptExpression):
|
||||
# return False
|
||||
#
|
||||
# if isinstance(self.concept, Concept):
|
||||
# return self.concept.name == other.concept.name
|
||||
#
|
||||
# # when it's only the name of the concept
|
||||
# return self.concept == other.concept
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.concept, self.rule_name))
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_parsing_expression_from_name(name):
|
||||
# tokens = Tokenizer(name)
|
||||
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
|
||||
# if len(nodes) == 1:
|
||||
# return nodes[0]
|
||||
# else:
|
||||
# sequence = Sequence(nodes)
|
||||
# sequence.nodes = nodes
|
||||
# return sequence
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return None
|
||||
#
|
||||
# self.concept = to_match # Memoize
|
||||
#
|
||||
# if to_match not in parser.concepts_grammars:
|
||||
# # Try to match the concept using its name
|
||||
# expr = self.get_parsing_expression_from_name(to_match.name)
|
||||
# node = expr.parse(parser)
|
||||
# else:
|
||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
||||
#
|
||||
# if node is None:
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
#
|
||||
#
|
||||
# class ConceptGroupExpression(ConceptExpression):
|
||||
# def _parse(self, parser):
|
||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return None
|
||||
#
|
||||
# self.concept = to_match # Memoize
|
||||
#
|
||||
# if to_match not in parser.concepts_grammars:
|
||||
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
|
||||
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
# expr = OrderedChoice(nodes)
|
||||
# expr.nodes = nodes
|
||||
# node = expr.parse(parser)
|
||||
# else:
|
||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
||||
#
|
||||
# if node is None:
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
#
|
||||
#
|
||||
# class Sequence(ParsingExpression):
|
||||
# """
|
||||
# Will match sequence of parser expressions in exact order they are defined.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = parser.pos
|
||||
#
|
||||
# children = []
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node is None:
|
||||
# return None
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
||||
#
|
||||
#
|
||||
# class OrderedChoice(ParsingExpression):
|
||||
# """
|
||||
# Will match one among multiple
|
||||
# It will stop at the first match (so the order of definition is important)
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
#
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node:
|
||||
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||
#
|
||||
# parser.seek(init_pos) # backtrack
|
||||
#
|
||||
# return None
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = "| ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
||||
#
|
||||
#
|
||||
# class Optional(ParsingExpression):
|
||||
# """
|
||||
# Will match or not the elements
|
||||
# if many matches, will choose longest one
|
||||
# If you need order, use Optional(OrderedChoice)
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||
#
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node:
|
||||
# if node.end > selected_node.end:
|
||||
# selected_node = NonTerminalNode(
|
||||
# self,
|
||||
# node.start,
|
||||
# node.end,
|
||||
# parser.tokens[node.start: node.end + 1],
|
||||
# [node])
|
||||
#
|
||||
# parser.seek(init_pos) # backtrack
|
||||
#
|
||||
# if selected_node.end != -1:
|
||||
# parser.seek(selected_node.end)
|
||||
# parser.next_token() # eat the tokens found
|
||||
#
|
||||
# return selected_node
|
||||
#
|
||||
# def __repr__(self):
|
||||
# if len(self.elements) == 1:
|
||||
# return f"{self.elements[0]}?"
|
||||
# else:
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})?")
|
||||
#
|
||||
#
|
||||
# class Repetition(ParsingExpression):
|
||||
# """
|
||||
# Base class for all repetition-like parser expressions (?,*,+)
|
||||
# Args:
|
||||
# eolterm(bool): Flag that indicates that end of line should
|
||||
# terminate repetition match.
|
||||
# """
|
||||
#
|
||||
# def __init__(self, *elements, **kwargs):
|
||||
# super(Repetition, self).__init__(*elements, **kwargs)
|
||||
# self.sep = kwargs.get('sep', None)
|
||||
#
|
||||
#
|
||||
# class ZeroOrMore(Repetition):
|
||||
# """
|
||||
# ZeroOrMore will try to match parser expression specified zero or more
|
||||
# times. It will never fail.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = -1
|
||||
# children = []
|
||||
#
|
||||
# while True:
|
||||
# current_pos = parser.pos
|
||||
#
|
||||
# # maybe eat the separator if needed
|
||||
# if self.sep and children:
|
||||
# sep_result = self.sep.parse(parser)
|
||||
# if sep_result is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
#
|
||||
# # eat the ZeroOrMore
|
||||
# node = self.nodes[0].parse(parser)
|
||||
# if node is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# if len(children) == 0:
|
||||
# return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})*")
|
||||
#
|
||||
#
|
||||
# class OneOrMore(Repetition):
|
||||
# """
|
||||
# OneOrMore will try to match parser expression specified one or more times.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = -1
|
||||
# children = []
|
||||
#
|
||||
# while True:
|
||||
# current_pos = parser.pos
|
||||
#
|
||||
# # maybe eat the separator if needed
|
||||
# if self.sep and children:
|
||||
# sep_result = self.sep.parse(parser)
|
||||
# if sep_result is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
#
|
||||
# # eat the ZeroOrMore
|
||||
# node = self.nodes[0].parse(parser)
|
||||
# if node is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# if len(children) == 0: # if nothing is found, it's an error
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})+")
|
||||
#
|
||||
#
|
||||
# class UnorderedGroup(Repetition):
|
||||
# """
|
||||
# Will try to match all of the parsing expression in any order.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
# # def __repr__(self):
|
||||
# # to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# # return f"({to_str})#"
|
||||
#
|
||||
#
|
||||
# class Match(ParsingExpression):
|
||||
# """
|
||||
# Base class for all classes that will try to match something from the input.
|
||||
# """
|
||||
#
|
||||
# def __init__(self, rule_name, root=False):
|
||||
# super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
#
|
||||
# def parse(self, parser):
|
||||
# result = self._parse(parser)
|
||||
# return result
|
||||
#
|
||||
#
|
||||
# class StrMatch(Match):
|
||||
# """
|
||||
# Matches a literal
|
||||
# """
|
||||
#
|
||||
# def __init__(self, to_match, rule_name="", ignore_case=True):
|
||||
# super(Match, self).__init__(rule_name=rule_name)
|
||||
# self.to_match = to_match
|
||||
# self.ignore_case = ignore_case
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not super().__eq__(other):
|
||||
# return False
|
||||
#
|
||||
# if not isinstance(other, StrMatch):
|
||||
# return False
|
||||
#
|
||||
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# token = parser.get_token()
|
||||
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
|
||||
# else token.value == self.to_match
|
||||
#
|
||||
# if m:
|
||||
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
|
||||
# parser.next_token()
|
||||
# return node
|
||||
#
|
||||
# return None
|
||||
#
|
||||
#
|
||||
# class BnfNodeParser(BaseParser):
|
||||
# def __init__(self, **kwargs):
|
||||
# super().__init__("BnfNode_old", 50)
|
||||
# self.enabled = False
|
||||
# if 'grammars' in kwargs:
|
||||
# self.concepts_grammars = kwargs.get("grammars")
|
||||
# elif 'sheerka' in kwargs:
|
||||
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||
# else:
|
||||
# self.concepts_grammars = {}
|
||||
#
|
||||
# self.ignore_case = True
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.tokens = None
|
||||
#
|
||||
# self.context = None
|
||||
# self.text = None
|
||||
# self.sheerka = None
|
||||
#
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.next_token()
|
||||
# return error
|
||||
#
|
||||
# def reset_parser(self, context, text):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
# except core.tokenizer.LexerError as e:
|
||||
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
# return False
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.next_token(False)
|
||||
# return True
|
||||
#
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
#
|
||||
# def seek(self, pos):
|
||||
# self.pos = pos
|
||||
# self.token = self.tokens[self.pos]
|
||||
# return True
|
||||
#
|
||||
# def rewind(self, offset, skip_whitespace=True):
|
||||
# self.pos += offset
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
|
||||
# self.pos -= 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# def initialize(self, context, concepts_definitions):
|
||||
# """
|
||||
# Adds a bunch of concepts, and how they can be recognized
|
||||
# :param context: execution context
|
||||
# :param concepts_definitions: dictionary of concept, concept_definition
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# concepts_to_resolve = set()
|
||||
#
|
||||
# for concept, concept_def in concepts_definitions.items():
|
||||
# # ## Gets the grammars
|
||||
# context.log(f"Resolving grammar for '{concept}'", context.who)
|
||||
# concept.init_key() # make sure that the key is initialized
|
||||
# grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||
# self.concepts_grammars[concept] = grammar
|
||||
#
|
||||
# if self.has_error:
|
||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
#
|
||||
# # ## Removes concepts with infinite recursions
|
||||
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||
# for concept in concepts_to_remove:
|
||||
# concepts_to_resolve.remove(concept)
|
||||
# del self.concepts_grammars[concept]
|
||||
#
|
||||
# if self.has_error:
|
||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
# else:
|
||||
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||
#
|
||||
# def get_concept(self, concept_name):
|
||||
# if concept_name in self.context.concepts:
|
||||
# return self.context.concepts[concept_name]
|
||||
# return self.sheerka.get_by_key(concept_name)
|
||||
#
|
||||
# def get_model(self, concept_def, concepts_to_resolve):
|
||||
#
|
||||
# # TODO
|
||||
# # inner_get_model must not modify the initial ParsingExpression
|
||||
# # A copy must be created
|
||||
# def inner_get_model(expression):
|
||||
# if isinstance(expression, Concept):
|
||||
# if self.sheerka.isaset(self.context, expression):
|
||||
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
# else:
|
||||
# ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
# concepts_to_resolve.add(expression)
|
||||
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
|
||||
# if expression.rule_name is None or expression.rule_name == "":
|
||||
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
# else expression.concept
|
||||
# if isinstance(expression.concept, str):
|
||||
# concept = self.get_concept(expression.concept)
|
||||
# if self.sheerka.is_known(concept):
|
||||
# expression.concept = concept
|
||||
# concepts_to_resolve.add(expression.concept)
|
||||
# ret = expression
|
||||
# elif isinstance(expression, str):
|
||||
# ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
# elif isinstance(expression, StrMatch):
|
||||
# ret = expression
|
||||
# if ret.ignore_case is None:
|
||||
# ret.ignore_case = self.ignore_case
|
||||
# elif isinstance(expression, Sequence) or \
|
||||
# isinstance(expression, OrderedChoice) or \
|
||||
# isinstance(expression, ZeroOrMore) or \
|
||||
# isinstance(expression, OneOrMore) or \
|
||||
# isinstance(expression, Optional):
|
||||
# ret = expression
|
||||
# ret.nodes = [inner_get_model(e) for e in ret.elements]
|
||||
# else:
|
||||
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
#
|
||||
# # Translate separator expression.
|
||||
# if isinstance(expression, Repetition) and expression.sep:
|
||||
# expression.sep = inner_get_model(expression.sep)
|
||||
#
|
||||
# return ret
|
||||
#
|
||||
# model = inner_get_model(concept_def)
|
||||
#
|
||||
# return model
|
||||
#
|
||||
# def detect_infinite_recursion(self, concepts_to_resolve):
|
||||
#
|
||||
# # infinite recursion matcher
|
||||
# def _is_infinite_recursion(ref_concept, node):
|
||||
# if isinstance(node, ConceptExpression):
|
||||
# if node.concept == ref_concept:
|
||||
# return True
|
||||
#
|
||||
# if isinstance(node.concept, str):
|
||||
# to_match = self.get_concept(node.concept)
|
||||
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return False
|
||||
# else:
|
||||
# to_match = node.concept
|
||||
#
|
||||
# if to_match not in self.concepts_grammars:
|
||||
# return False
|
||||
#
|
||||
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||
#
|
||||
# if isinstance(node, OrderedChoice):
|
||||
# return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||
#
|
||||
# if isinstance(node, Sequence):
|
||||
# for node in node.nodes:
|
||||
# if _is_infinite_recursion(ref_concept, node):
|
||||
# return True
|
||||
# return False
|
||||
#
|
||||
# return False
|
||||
#
|
||||
# removed_concepts = []
|
||||
# for e in concepts_to_resolve:
|
||||
# if isinstance(e, str):
|
||||
# e = self.get_concept(e)
|
||||
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# continue
|
||||
#
|
||||
# if e not in self.concepts_grammars:
|
||||
# continue
|
||||
#
|
||||
# to_resolve = self.concepts_grammars[e]
|
||||
# if _is_infinite_recursion(e, to_resolve):
|
||||
# removed_concepts.append(e)
|
||||
# return removed_concepts
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# if parser_input == "":
|
||||
# return context.sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
# )
|
||||
#
|
||||
# if not self.reset_parser(context, parser_input):
|
||||
# return self.sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
#
|
||||
# concepts_found = [[]]
|
||||
# unrecognized_tokens = None
|
||||
# has_unrecognized = False
|
||||
#
|
||||
# # actually list of list
|
||||
# # The first dimension is the number of possibilities found
|
||||
# # The second dimension is the number of concepts found, under one possibility
|
||||
# #
|
||||
# # Example 1
|
||||
# # concept foo : 'one' 'two'
|
||||
# # concept bar : 'one' 'two'
|
||||
# # input 'one two' -> will produce two possibilities (foo and bar).
|
||||
# #
|
||||
# # Example 2
|
||||
# # concept foo : 'one'
|
||||
# # concept bar : 'two'
|
||||
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
|
||||
#
|
||||
# while True:
|
||||
# init_pos = self.pos
|
||||
# res = []
|
||||
#
|
||||
# for concept, grammar in self.concepts_grammars.items():
|
||||
# self.seek(init_pos)
|
||||
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
# if node is not None and node.end != -1:
|
||||
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
|
||||
# concept_node = ConceptNode(
|
||||
# updated_concept,
|
||||
# node.start,
|
||||
# node.end,
|
||||
# self.tokens[node.start: node.end + 1],
|
||||
# None,
|
||||
# node)
|
||||
# res.append(concept_node)
|
||||
#
|
||||
# if len(res) == 0: # not recognized
|
||||
# self.seek(init_pos)
|
||||
# if unrecognized_tokens:
|
||||
# unrecognized_tokens.add_token(self.get_token(), init_pos)
|
||||
# else:
|
||||
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
|
||||
#
|
||||
# if not self.next_token(False):
|
||||
# break
|
||||
#
|
||||
# else: # some concepts are recognized
|
||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
# unrecognized_tokens.fix_source()
|
||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
# has_unrecognized = True
|
||||
# unrecognized_tokens = None
|
||||
#
|
||||
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||
# concepts_found = core.utils.product(concepts_found, res)
|
||||
#
|
||||
# # loop
|
||||
# self.seek(res[0].end)
|
||||
# if not self.next_token(False):
|
||||
# break
|
||||
#
|
||||
# # Fix the source for unrecognized tokens
|
||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
# unrecognized_tokens.fix_source()
|
||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
# has_unrecognized = True
|
||||
#
|
||||
# # else
|
||||
# # returns as many ReturnValue than choices found
|
||||
# ret = []
|
||||
# for choice in concepts_found:
|
||||
# ret.append(
|
||||
# self.sheerka.ret(
|
||||
# self.name,
|
||||
# not has_unrecognized,
|
||||
# self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input,
|
||||
# body=choice,
|
||||
# try_parsed=choice)))
|
||||
#
|
||||
# if len(ret) == 1:
|
||||
# self.log_result(context, parser_input, ret[0])
|
||||
# return ret[0]
|
||||
# else:
|
||||
# self.log_multiple_results(context, parser_input, ret)
|
||||
# return ret
|
||||
#
|
||||
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
# """
|
||||
# Updates the properties of the concept
|
||||
# Goes in recursion if the property is a concept
|
||||
# """
|
||||
#
|
||||
# # this cache is to make sure that we return the same concept for the same ConceptExpression
|
||||
# _underlying_value_cache = {}
|
||||
#
|
||||
# def _add_prop(_concept, prop_name, value):
|
||||
# """
|
||||
# Adds a new entry,
|
||||
# makes a list if the property already exists
|
||||
# """
|
||||
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
|
||||
# # new entry
|
||||
# _concept.compiled[prop_name] = value
|
||||
# else:
|
||||
# # make a list if there was a value
|
||||
# previous_value = _concept.compiled[prop_name]
|
||||
# if isinstance(previous_value, list):
|
||||
# previous_value.append(value)
|
||||
# else:
|
||||
# new_value = [previous_value, value]
|
||||
# _concept.compiled[prop_name] = new_value
|
||||
#
|
||||
# def _look_for_concept_match(_underlying):
|
||||
# """
|
||||
# At some point, there is either an StrMatch or a ConceptMatch,
|
||||
# that allowed the recognition.
|
||||
# Look for the ConceptMatch, with recursion if needed
|
||||
# """
|
||||
# if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
# return _underlying
|
||||
#
|
||||
# if not isinstance(_underlying, NonTerminalNode):
|
||||
# return None
|
||||
#
|
||||
# if len(_underlying.children) != 1:
|
||||
# return None
|
||||
#
|
||||
# return _look_for_concept_match(_underlying.children[0])
|
||||
#
|
||||
# def _get_underlying_value(_underlying):
|
||||
# concept_match_node = _look_for_concept_match(_underlying)
|
||||
# if concept_match_node:
|
||||
# # the value is a concept
|
||||
# if id(concept_match_node) in _underlying_value_cache:
|
||||
# result = _underlying_value_cache[id(concept_match_node)]
|
||||
# else:
|
||||
# ref_tpl = concept_match_node.parsing_expression.concept
|
||||
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
# _underlying_value_cache[id(concept_match_node)] = result
|
||||
# else:
|
||||
# # the value is a string
|
||||
# result = DoNotResolve(_underlying.source)
|
||||
#
|
||||
# return result
|
||||
#
|
||||
# def _process_rule_name(_concept, _underlying):
|
||||
# if _underlying.parsing_expression.rule_name:
|
||||
# value = _get_underlying_value(_underlying)
|
||||
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||
# _concept.metadata.need_validation = True
|
||||
#
|
||||
# if isinstance(_underlying, NonTerminalNode):
|
||||
# for child in _underlying.children:
|
||||
# _process_rule_name(_concept, child)
|
||||
#
|
||||
# key = (template.key, template.id) if template.id else template.key
|
||||
# concept = sheerka.new(key)
|
||||
# if init_empty_body and concept.metadata.body is None:
|
||||
# value = _get_underlying_value(underlying)
|
||||
# concept.compiled[ConceptParts.BODY] = value
|
||||
# if underlying.parsing_expression.rule_name:
|
||||
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
# # KSI : Why don't we set concept.metadata.need_validation to True ?
|
||||
#
|
||||
# if isinstance(underlying, NonTerminalNode):
|
||||
# for node in underlying.children:
|
||||
# _process_rule_name(concept, node)
|
||||
#
|
||||
# return concept
|
||||
#
|
||||
# def encode_grammar(self, grammar):
|
||||
# """
|
||||
# Transform the grammar into something that can easily can be serialized
|
||||
# :param grammar:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# def _encode(expression):
|
||||
# if isinstance(expression, StrMatch):
|
||||
# res = f"'{expression.to_match}'"
|
||||
#
|
||||
# elif isinstance(expression, ConceptExpression):
|
||||
# res = core.utils.str_concept(expression.concept)
|
||||
#
|
||||
# elif isinstance(expression, Sequence):
|
||||
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
|
||||
#
|
||||
# elif isinstance(expression, OrderedChoice):
|
||||
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
|
||||
#
|
||||
# elif isinstance(expression, Optional):
|
||||
# res = _encode(expression.nodes[0]) + "?"
|
||||
#
|
||||
# elif isinstance(expression, ZeroOrMore):
|
||||
# res = _encode(expression.nodes[0]) + "*"
|
||||
#
|
||||
# elif isinstance(expression, OneOrMore):
|
||||
# res = _encode(expression.nodes[0]) + "+"
|
||||
#
|
||||
# if expression.rule_name:
|
||||
# res += "=" + expression.rule_name
|
||||
#
|
||||
# return res
|
||||
#
|
||||
# result = {}
|
||||
# for k, v in grammar.items():
|
||||
# key = core.utils.str_concept(k)
|
||||
# value = _encode(v)
|
||||
# result[key] = value
|
||||
# return result
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_bests(results):
|
||||
# """
|
||||
# Returns the result that is the longest
|
||||
# :param results:
|
||||
# :return:
|
||||
# """
|
||||
# by_end_pos = defaultdict(list)
|
||||
# for result in results:
|
||||
# by_end_pos[result.end].append(result)
|
||||
#
|
||||
# return by_end_pos[max(by_end_pos)]
|
||||
#
|
||||
#
|
||||
# class ParsingExpressionVisitor:
|
||||
# """
|
||||
# visit ParsingExpression
|
||||
# """
|
||||
#
|
||||
# def visit(self, parsing_expression):
|
||||
# name = parsing_expression.__class__.__name__
|
||||
#
|
||||
# method = 'visit_' + name
|
||||
# visitor = getattr(self, method, self.generic_visit)
|
||||
# return visitor(parsing_expression)
|
||||
#
|
||||
# def generic_visit(self, parsing_expression):
|
||||
# if hasattr(self, "visit_all"):
|
||||
# self.visit_all(parsing_expression)
|
||||
#
|
||||
# for node in parsing_expression.elements:
|
||||
# if isinstance(node, Concept):
|
||||
# self.visit(ConceptExpression(node.key or node.name))
|
||||
# elif isinstance(node, str):
|
||||
# self.visit(StrMatch(node))
|
||||
# else:
|
||||
# self.visit(node)
|
||||
@@ -0,0 +1,108 @@
|
||||
# # try to match something like
|
||||
# # ConceptNode 'plus' ConceptNode
|
||||
# #
|
||||
# # Replaced by SyaNodeParser
|
||||
# from core.builtin_concepts import BuiltinConcepts
|
||||
# from core.tokenizer import TokenKind, Token
|
||||
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
# from parsers.BaseParser import BaseParser
|
||||
# from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
# from core.concept import VARIABLE_PREFIX
|
||||
#
|
||||
# multiple_concepts_parser = MultipleConceptsParser()
|
||||
#
|
||||
#
|
||||
# class ConceptsWithConceptsParser(BaseParser):
|
||||
# def __init__(self, **kwargs):
|
||||
# super().__init__("ConceptsWithConcepts", 25)
|
||||
# self.enabled = False
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_tokens(nodes):
|
||||
# tokens = []
|
||||
#
|
||||
# for node in nodes:
|
||||
# if isinstance(node, ConceptNode):
|
||||
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
||||
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
||||
# else:
|
||||
# for token in node.tokens:
|
||||
# if token.type == TokenKind.EOF:
|
||||
# break
|
||||
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
||||
# continue
|
||||
# else:
|
||||
# tokens.append(token)
|
||||
#
|
||||
# return tokens
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_key(nodes):
|
||||
# key = ""
|
||||
# index = 0
|
||||
# for node in nodes:
|
||||
# if key:
|
||||
# key += " "
|
||||
#
|
||||
# if isinstance(node, UnrecognizedTokensNode):
|
||||
# key += node.source.strip()
|
||||
# else:
|
||||
# key += f"{VARIABLE_PREFIX}{index}"
|
||||
# index += 1
|
||||
#
|
||||
# return key
|
||||
#
|
||||
# def finalize_concept(self, context, concept, nodes):
|
||||
# index = 0
|
||||
# for node in nodes:
|
||||
#
|
||||
# if isinstance(node, ConceptNode):
|
||||
# prop_name = list(concept.props.keys())[index]
|
||||
# concept.compiled[prop_name] = node.concept
|
||||
# context.log(
|
||||
# f"Setting property '{prop_name}='{node.concept}'.",
|
||||
# self.name)
|
||||
# index += 1
|
||||
# elif isinstance(node, SourceCodeNode):
|
||||
# prop_name = list(concept.props.keys())[index]
|
||||
# sheerka = context.sheerka
|
||||
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
||||
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
||||
# context.log(
|
||||
# f"Setting property '{prop_name}'='Python({node.source})'.",
|
||||
# self.name)
|
||||
# index += 1
|
||||
#
|
||||
# return concept
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# sheerka = context.sheerka
|
||||
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
# if not nodes:
|
||||
# return None
|
||||
#
|
||||
# concept_key = self.get_key(nodes)
|
||||
# concept = sheerka.new(concept_key)
|
||||
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
#
|
||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
# for concept in concepts:
|
||||
# self.finalize_concept(context, concept, nodes)
|
||||
#
|
||||
# res = []
|
||||
# for concept in concepts:
|
||||
# res.append(sheerka.ret(
|
||||
# self.name,
|
||||
# True,
|
||||
# sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input.source,
|
||||
# body=concept,
|
||||
# try_parsed=None)))
|
||||
#
|
||||
# return res[0] if len(res) == 1 else res
|
||||
@@ -0,0 +1,163 @@
|
||||
# # to be replaced by SyaNodeParser
|
||||
# import ast
|
||||
#
|
||||
# from core.builtin_concepts import BuiltinConcepts
|
||||
# from core.tokenizer import TokenKind
|
||||
# from parsers.BaseNodeParser import SourceCodeNode
|
||||
# from parsers.BaseParser import BaseParser
|
||||
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
# import core.utils
|
||||
# from parsers.PythonParser import PythonParser
|
||||
#
|
||||
# concept_lexer_parser = BnfNodeParser()
|
||||
#
|
||||
#
|
||||
# class MultipleConceptsParser(BaseParser):
|
||||
# """
|
||||
# Parser that will take the result of BnfNodeParser and
|
||||
# try to resolve the unrecognized tokens token by token
|
||||
#
|
||||
# It is a success when it returns a list ConceptNode exclusively
|
||||
# """
|
||||
#
|
||||
# def __init__(self, **kwargs):
|
||||
# BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
# self.enabled = False
|
||||
#
|
||||
# @staticmethod
|
||||
# def finalize(nodes_found, unrecognized_tokens):
|
||||
# if not unrecognized_tokens:
|
||||
# return nodes_found, unrecognized_tokens
|
||||
#
|
||||
# unrecognized_tokens.fix_source()
|
||||
# if unrecognized_tokens.not_whitespace():
|
||||
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
#
|
||||
# return nodes_found, None
|
||||
#
|
||||
# @staticmethod
|
||||
# def create_or_add(unrecognized_tokens, token, index):
|
||||
# if unrecognized_tokens:
|
||||
# unrecognized_tokens.add_token(token, index)
|
||||
# else:
|
||||
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
# return unrecognized_tokens
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# sheerka = context.sheerka
|
||||
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
# if not nodes:
|
||||
# return None
|
||||
#
|
||||
# nodes_found = [[]]
|
||||
# concepts_only = True
|
||||
#
|
||||
# for node in nodes:
|
||||
# if isinstance(node, UnrecognizedTokensNode):
|
||||
# unrecognized_tokens = None
|
||||
# i = 0
|
||||
#
|
||||
# while i < len(node.tokens):
|
||||
#
|
||||
# token_index = node.start + i
|
||||
# token = node.tokens[i]
|
||||
#
|
||||
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
# if concepts_nodes is not None:
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
# i += 1
|
||||
# continue
|
||||
#
|
||||
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
# if source_code_node:
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
# nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
# i += len(source_code_node.tokens)
|
||||
# continue
|
||||
#
|
||||
# # not a concept nor some source code
|
||||
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
# i += 1
|
||||
#
|
||||
# # finish processing if needed
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
#
|
||||
# else:
|
||||
# nodes_found = core.utils.product(nodes_found, [node])
|
||||
#
|
||||
# ret = []
|
||||
# for choice in nodes_found:
|
||||
# ret.append(
|
||||
# sheerka.ret(
|
||||
# self.name,
|
||||
# concepts_only,
|
||||
# sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input.source,
|
||||
# body=choice,
|
||||
# try_parsed=None))
|
||||
# )
|
||||
#
|
||||
# if len(ret) == 1:
|
||||
# self.log_result(context, parser_input.source, ret[0])
|
||||
# return ret[0]
|
||||
# else:
|
||||
# self.log_multiple_results(context, parser_input.source, ret)
|
||||
# return ret
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_concepts_nodes(context, index, token):
|
||||
# """
|
||||
# Tries to recognize a concept
|
||||
# from the univers of all known concepts
|
||||
# """
|
||||
#
|
||||
# if token.type != TokenKind.IDENTIFIER:
|
||||
# return None
|
||||
#
|
||||
# concept = context.new_concept(token.value)
|
||||
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
# return concepts_nodes
|
||||
#
|
||||
# return None
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_source_code_node(context, index, tokens):
|
||||
# """
|
||||
# Tries to recognize source code.
|
||||
# For the time being, only Python is supported
|
||||
# :param context:
|
||||
# :param tokens:
|
||||
# :param index:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
# return None
|
||||
#
|
||||
# end_index = len(tokens)
|
||||
# while end_index > 0:
|
||||
# parser = PythonParser()
|
||||
# tokens_to_parse = tokens[:end_index]
|
||||
# res = parser.parse(context, tokens_to_parse)
|
||||
# if res.status:
|
||||
# # only expression are accepted
|
||||
# ast_ = res.value.value.ast_
|
||||
# if not isinstance(ast_, ast.Expression):
|
||||
# return None
|
||||
# try:
|
||||
# compiled = compile(ast_, "<string>", "eval")
|
||||
# eval(compiled, {}, {})
|
||||
# except Exception:
|
||||
# return None
|
||||
#
|
||||
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
# end_index -= 1
|
||||
#
|
||||
# return None
|
||||
Reference in New Issue
Block a user