Fixed SyaNodeParser false positive recognition issue
This commit is contained in:
@@ -349,9 +349,6 @@ class EnumerationConcept(Concept):
|
||||
self.set_value(ConceptParts.BODY, iteration)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
# def __iter__(self):
|
||||
# return iter(self.body)
|
||||
|
||||
|
||||
class ListConcept(Concept):
|
||||
def __init__(self, items=None):
|
||||
@@ -362,21 +359,6 @@ class ListConcept(Concept):
|
||||
def append(self, obj):
|
||||
self.body.append(obj)
|
||||
|
||||
# def __len__(self):
|
||||
# return len(self.body)
|
||||
#
|
||||
# def __getitem__(self, key):
|
||||
# return self.body[key]
|
||||
#
|
||||
# def __setitem__(self, key, value):
|
||||
# self.body[key] = value
|
||||
#
|
||||
# def __iter__(self):
|
||||
# return iter(self.body)
|
||||
#
|
||||
# def __contains__(self, item):
|
||||
# return item in self.body
|
||||
|
||||
|
||||
class FilteredConcept(Concept):
|
||||
def __init__(self, filtered=None, iterable=None, predicate=None):
|
||||
@@ -450,5 +432,5 @@ class ExplanationConcept(Concept):
|
||||
self.set_value("command", command) # explain command parameters
|
||||
self.set_value("title", title) # a title to the explanation
|
||||
self.set_value("instructions", instructions) # instructions for SheerkaPrint
|
||||
self.set_value(ConceptParts.BODY, execution_result) # list of results
|
||||
self.set_value(ConceptParts.BODY, execution_result) # list of results
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
@@ -326,6 +326,7 @@ def ensure_evaluated(context, concept):
|
||||
|
||||
return evaluated
|
||||
|
||||
|
||||
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
|
||||
"""
|
||||
Using parsers, try to recognize concepts from source
|
||||
|
||||
+3
-3
@@ -221,7 +221,7 @@ class Concept:
|
||||
Create the key for this concept.
|
||||
Must be called only when the concept if fully initialized
|
||||
|
||||
The method is not called set_key to make sure that no other class set the key by mistake
|
||||
The method is not called 'set_key' to make sure that no other class set the key by mistake
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
@@ -248,8 +248,8 @@ class Concept:
|
||||
if token.value in variables:
|
||||
key += VARIABLE_PREFIX + str(variables.index(token.value))
|
||||
else:
|
||||
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
key += value
|
||||
#value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
key += token.value
|
||||
first = False
|
||||
|
||||
self.metadata.key = key
|
||||
|
||||
@@ -56,12 +56,6 @@ class SheerkaCreateNewConcept:
|
||||
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# update concept definition by key
|
||||
# init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True)
|
||||
# if not init_sya_ret_value.status:
|
||||
# return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
|
||||
# concepts_by_first_keyword = init_sya_ret_value.body
|
||||
|
||||
concept.freeze_definition_hash()
|
||||
|
||||
cache_manager.add_concept(concept)
|
||||
@@ -74,21 +68,3 @@ class SheerkaCreateNewConcept:
|
||||
# process the return if needed
|
||||
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
return ret
|
||||
|
||||
# def load_concepts_nodes_definitions(self, context):
|
||||
# """
|
||||
# Gets from sdp what is need to parse nodes
|
||||
# :return:
|
||||
# """
|
||||
# sdp = self.sheerka.sdp
|
||||
#
|
||||
# concepts_by_first_keyword = sdp.get(
|
||||
# self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
# load_origin=False) or {}
|
||||
#
|
||||
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
# if not init_ret_value.status:
|
||||
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
# resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
#
|
||||
# return concepts_by_first_keyword, resolved_concepts_by_first_keyword
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
|
||||
NO_MATCH = "** No Match **"
|
||||
|
||||
|
||||
class SheerkaExecute:
|
||||
"""
|
||||
Manage the execution of a process flow
|
||||
@@ -58,7 +59,8 @@ class SheerkaExecute:
|
||||
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
|
||||
# execution_context.log(f"Parsing {debug_text}")
|
||||
|
||||
with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context:
|
||||
with execution_context.push(desc=f"Parsing using {parser.name}",
|
||||
logger=parser.verbose_log) as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
res = parser.parse(sub_context, to_parse)
|
||||
if res is not None:
|
||||
@@ -86,7 +88,6 @@ class SheerkaExecute:
|
||||
stop_processing = True
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
|
||||
if stop_processing:
|
||||
break # Do not try the other priorities if a match is found
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class SheerkaModifyConcept:
|
||||
|
||||
# TODO : update concept by first keyword
|
||||
# TODO : update resolved by first keyword
|
||||
# TODO : update concets grammars
|
||||
# TODO : update concepts grammars
|
||||
|
||||
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
return ret
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from sdp.sheerkaSerializer import Serializer
|
||||
|
||||
|
||||
@dataclass
|
||||
class Variable:
|
||||
|
||||
@@ -60,10 +60,6 @@ class Sheerka(Concept):
|
||||
|
||||
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
|
||||
|
||||
# # Cache for concepts grammars
|
||||
# # To be shared between BNFNode parsers instances
|
||||
# self.concepts_grammars = {}
|
||||
|
||||
# a concept can be instantiated
|
||||
# ex: File is a concept, but File('foo.txt') is an instance
|
||||
# TODO: manage contexts
|
||||
@@ -303,27 +299,6 @@ class Sheerka(Concept):
|
||||
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
|
||||
|
||||
# sya = self.bnf.resolve_sya_associativity_and_precedence()
|
||||
# self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya)
|
||||
#
|
||||
#
|
||||
# self.concepts_by_first_keyword, \
|
||||
# self.resolved_concepts_by_first_keyword = \
|
||||
# self.create_new_concept_handler.load_concepts_nodes_definitions(context)
|
||||
|
||||
# self.concepts_by_first_keyword = self.sdp.get_safe(
|
||||
# self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
# load_origin=False) or {}
|
||||
#
|
||||
# self.sya_definitions = self.sdp.get_safe(
|
||||
# self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
||||
# load_origin=False) or {}
|
||||
#
|
||||
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword)
|
||||
# if not init_ret_value.status:
|
||||
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
# self.resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
def reset(self, cache_only=False):
|
||||
self.cache_manager.clear()
|
||||
self.cache_manager.cache_only = cache_only
|
||||
@@ -346,7 +321,6 @@ class Sheerka(Concept):
|
||||
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
|
||||
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
|
||||
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
|
||||
# execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED)
|
||||
|
||||
steps = [
|
||||
BuiltinConcepts.BEFORE_PARSING,
|
||||
@@ -525,28 +499,6 @@ class Sheerka(Concept):
|
||||
|
||||
return concept
|
||||
|
||||
#
|
||||
# def get(self, concept_key, concept_id=None):
|
||||
# """
|
||||
# Tries to find a concept
|
||||
# What is return must be used a template for another concept.
|
||||
# You must not modify the returned concept
|
||||
# :param concept_key: key of the concept
|
||||
# :param concept_id: when multiple concepts with the same key, use the id
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# by_key = self.get_by_key(concept_key)
|
||||
# if self.is_known(by_key):
|
||||
# return by_key
|
||||
#
|
||||
# # else return by name
|
||||
# by_name = self.get_by_name(concept_key)
|
||||
# if self.is_known(by_name):
|
||||
# return by_name
|
||||
#
|
||||
# return by_key # return not found for key
|
||||
|
||||
def get_by_key(self, concept_key, concept_id=None):
|
||||
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
|
||||
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
|
||||
|
||||
+33
-3
@@ -1,4 +1,4 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ class TokenKind(Enum):
|
||||
DEGREE = "degree" # °
|
||||
WORD = "word"
|
||||
EQUALSEQUALS = "=="
|
||||
VAR_DEF = "__var__"
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -58,6 +59,8 @@ class Token:
|
||||
line: int
|
||||
column: int
|
||||
|
||||
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
|
||||
def __repr__(self):
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
value = str(self.value)
|
||||
@@ -72,6 +75,23 @@ class Token:
|
||||
|
||||
return f"Token({value})"
|
||||
|
||||
@property
|
||||
def str_value(self):
|
||||
if self._str_value:
|
||||
return self._str_value
|
||||
|
||||
if self.type == TokenKind.STRING:
|
||||
self._str_value = self.value[1:-1]
|
||||
elif self.type == TokenKind.KEYWORD:
|
||||
self._str_value = self.value.value
|
||||
else:
|
||||
self._str_value = str(self.value)
|
||||
return self._str_value
|
||||
|
||||
@staticmethod
|
||||
def is_whitespace(token):
|
||||
return token and token.type == TokenKind.WHITESPACE
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerError(Exception):
|
||||
@@ -101,12 +121,13 @@ class Tokenizer:
|
||||
|
||||
KEYWORDS = set(x.value for x in Keywords)
|
||||
|
||||
def __init__(self, text, parse_word=False):
|
||||
def __init__(self, text, yield_eof=True, parse_word=False):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
self.column = 1
|
||||
self.line = 1
|
||||
self.i = 0
|
||||
self.yield_eof = yield_eof
|
||||
self.parse_word = parse_word
|
||||
|
||||
def __iter__(self):
|
||||
@@ -134,6 +155,7 @@ class Tokenizer:
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
elif c == "_":
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
@@ -141,6 +163,13 @@ class Tokenizer:
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif self.i + 7 < self.text_len and \
|
||||
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
|
||||
self.text[self.i + 7].isdigit():
|
||||
number = self.eat_number(self.i + 7)
|
||||
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
|
||||
self.i += 7 + len(number)
|
||||
self.column += 7 + len(number)
|
||||
else:
|
||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
@@ -308,7 +337,8 @@ class Tokenizer:
|
||||
else:
|
||||
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
||||
|
||||
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
||||
if self.yield_eof:
|
||||
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
||||
|
||||
def eat_concept(self, start, line, column):
|
||||
key, id, buffer = None, None, ""
|
||||
|
||||
@@ -91,7 +91,7 @@ class AtomConceptParserHelper:
|
||||
|
||||
self.debug.append(token)
|
||||
|
||||
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
|
||||
if self.expected_tokens[0] != token.str_value:
|
||||
self.errors.append(UnexpectedTokenErrorNode(
|
||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||
token,
|
||||
@@ -119,7 +119,7 @@ class AtomConceptParserHelper:
|
||||
forked.eat_concept(concept, pos)
|
||||
|
||||
concept_node = ConceptNode(concept, pos, pos)
|
||||
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
|
||||
expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
|
||||
|
||||
if not expected:
|
||||
# the concept is already matched
|
||||
|
||||
@@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
def add_token(self, token, pos):
|
||||
if self.is_frozen:
|
||||
raise Exception("The node is frozen")
|
||||
@@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
|
||||
return self
|
||||
|
||||
def pop(self, token_kind):
|
||||
if self.is_frozen:
|
||||
raise Exception("The node is frozen")
|
||||
|
||||
if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
|
||||
self.tokens.pop()
|
||||
if len(self.tokens) == 0:
|
||||
self.reset()
|
||||
else:
|
||||
self.end -= 1
|
||||
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
def not_whitespace(self):
|
||||
return not self.is_whitespace()
|
||||
|
||||
@@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
def is_empty(self):
|
||||
return len(self.tokens) == 0
|
||||
|
||||
def last_token_type(self):
|
||||
if len(self.tokens) == 0:
|
||||
return None
|
||||
return self.tokens[-1].type
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
@@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
return custom_concepts if custom else None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
||||
"""
|
||||
|
||||
+193
-78
@@ -1,15 +1,16 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass, field
|
||||
from operator import attrgetter
|
||||
from typing import List
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import Token, TokenKind
|
||||
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.BaseParser import ErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "AtomNode", "Python"]
|
||||
|
||||
@@ -88,10 +89,13 @@ class SyaConceptParserHelper:
|
||||
concept: Concept
|
||||
start: int # position of the token in the tokenizer (Caution, it is not token.index)
|
||||
end: int = field(default=-1, repr=False, compare=False, hash=None)
|
||||
expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None)
|
||||
expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
|
||||
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
|
||||
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
|
||||
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
|
||||
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
|
||||
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
|
||||
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
|
||||
error: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
@@ -99,17 +103,20 @@ class SyaConceptParserHelper:
|
||||
if self.end == -1:
|
||||
self.end = self.start
|
||||
|
||||
first_keyword_found = False
|
||||
for name in concept.key.split():
|
||||
if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found:
|
||||
first_keyword_found = True
|
||||
first_keyword_found = None
|
||||
for token in Tokenizer(concept.key, yield_eof=False):
|
||||
if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
|
||||
first_keyword_found = token
|
||||
|
||||
if first_keyword_found:
|
||||
self.expected.append(name)
|
||||
self.expected.append(token)
|
||||
else:
|
||||
self.expected_parameters_before_first_token += 1
|
||||
self.last_token_before_first_token = token
|
||||
if token.type != TokenKind.WHITESPACE:
|
||||
self.expected_parameters_before_first_token += 1
|
||||
|
||||
self.eat_token() # remove the fist token
|
||||
self.eat_token(first_keyword_found) # remove the first token
|
||||
self.tokens.append(first_keyword_found)
|
||||
|
||||
def is_matched(self):
|
||||
return len(self.expected) == 0
|
||||
@@ -117,23 +124,38 @@ class SyaConceptParserHelper:
|
||||
def is_atom(self):
|
||||
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
|
||||
|
||||
def is_expected(self, token):
|
||||
if self.is_matched():
|
||||
def is_next(self, token):
|
||||
if self.is_matched() or len(self.expected) == 0:
|
||||
return False
|
||||
|
||||
token_value = BaseNodeParser.get_token_value(token)
|
||||
# True if the next token is the one that is expected
|
||||
# Or if the next token is a whitespace and the expected one is the one after
|
||||
# (whitespace are sometimes not mandatory)
|
||||
return token.str_value == self.expected[0].str_value or \
|
||||
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
|
||||
|
||||
def is_expected(self, token):
|
||||
if self.is_matched() or token.type == TokenKind.WHITESPACE:
|
||||
return False
|
||||
|
||||
for expected in self.expected:
|
||||
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
|
||||
if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def expected_parameters(self):
|
||||
return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected))
|
||||
return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
|
||||
|
||||
def eat_token(self):
|
||||
# No check, as it is used only after is_expected
|
||||
def eat_token(self, until_token):
|
||||
"""
|
||||
eat until token 'until'
|
||||
:param until_token:
|
||||
:return:
|
||||
"""
|
||||
# No check, as it is used only after is_expected() or is_next()
|
||||
while self.expected[0].str_value != until_token.str_value:
|
||||
del self.expected[0]
|
||||
del self.expected[0]
|
||||
|
||||
# return True is a whole sequence of keyword is eaten
|
||||
@@ -143,7 +165,10 @@ class SyaConceptParserHelper:
|
||||
if len(self.expected) == 0:
|
||||
return True
|
||||
|
||||
return self.expected[0].startswith(VARIABLE_PREFIX)
|
||||
# also return True at the end of a name sequence
|
||||
# ... <var0> bar baz qux <var1>
|
||||
# return True after 'qux', to indicate all the parameters from <var0> must be processed
|
||||
return self.expected[0].type == TokenKind.VAR_DEF
|
||||
|
||||
def eat_parameter(self, parameter):
|
||||
if self.is_matched() and parameter == self:
|
||||
@@ -153,7 +178,7 @@ class SyaConceptParserHelper:
|
||||
self.error = "No more parameter expected"
|
||||
return
|
||||
|
||||
if not self.expected[0].startswith(VARIABLE_PREFIX):
|
||||
if self.expected[0].type != TokenKind.VAR_DEF:
|
||||
self.error = "Parameter was not expected"
|
||||
return
|
||||
|
||||
@@ -202,6 +227,7 @@ class InFixToPostFix:
|
||||
self.errors = [] # Not quite sure that I can handle more than one error
|
||||
|
||||
self.debug = []
|
||||
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
|
||||
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
|
||||
|
||||
def __repr__(self):
|
||||
@@ -245,7 +271,6 @@ class InFixToPostFix:
|
||||
Note that when we are parsing non recognized tokens,
|
||||
we consider that the parenthesis are part of the non recognized
|
||||
:param token:
|
||||
:param stack:
|
||||
:return:
|
||||
"""
|
||||
return isinstance(token, Token) and token.type == TokenKind.RPAR
|
||||
@@ -268,10 +293,10 @@ class InFixToPostFix:
|
||||
:return:
|
||||
"""
|
||||
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
|
||||
if item.expected[0].startswith(VARIABLE_PREFIX):
|
||||
if item.expected[0].type == TokenKind.VAR_DEF:
|
||||
item.error = "Not enough suffix parameters"
|
||||
else:
|
||||
item.error = f"token '{item.expected[0]}' not found"
|
||||
item.error = f"token '{item.expected[0].str_value}' not found"
|
||||
|
||||
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
|
||||
self.out.insert(item.potential_pos, item)
|
||||
@@ -328,6 +353,16 @@ class InFixToPostFix:
|
||||
).pseudo_fix_source()
|
||||
return source_code
|
||||
|
||||
def _transform_to_unrecognized(self, parser_helper):
|
||||
# an Unrecognized when sent to out too prematurely
|
||||
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
|
||||
self.unrecognized_tokens = self.out.pop()
|
||||
|
||||
if parser_helper.remember_whitespace:
|
||||
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
|
||||
for i, token in enumerate(parser_helper.tokens):
|
||||
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
|
||||
|
||||
def get_errors(self):
|
||||
res = []
|
||||
res.extend(self.errors)
|
||||
@@ -343,28 +378,28 @@ class InFixToPostFix:
|
||||
|
||||
self.is_locked = False
|
||||
|
||||
def manage_parameters_when_new_concept(self, temp_concept_node):
|
||||
def manage_parameters_when_new_concept(self, parser_helper):
|
||||
"""
|
||||
When a new concept is create, we need to check what to do with the parameters
|
||||
that were queued
|
||||
:param temp_concept_node: new concept
|
||||
:param parser_helper: new concept
|
||||
:return:
|
||||
"""
|
||||
if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token:
|
||||
if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
|
||||
# The new concept expect some prefix parameters, but there's not enough
|
||||
temp_concept_node.error = "Not enough prefix parameters"
|
||||
parser_helper.error = "Not enough prefix parameters"
|
||||
return
|
||||
|
||||
if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token:
|
||||
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
|
||||
# There are more parameters than needed by the new concept
|
||||
# The others are either
|
||||
# - parameters for the previous concept (if any)
|
||||
# - concepts on their own
|
||||
# - syntax error
|
||||
# In all the cases, the only thing that matter is to pop what is expected by the new concept
|
||||
for i in range(temp_concept_node.expected_parameters_before_first_token):
|
||||
for i in range(parser_helper.expected_parameters_before_first_token):
|
||||
self.parameters_list.pop()
|
||||
temp_concept_node.parameters_list_at_init.extend(self.parameters_list)
|
||||
parser_helper.parameters_list_at_init.extend(self.parameters_list)
|
||||
return
|
||||
|
||||
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
|
||||
@@ -385,14 +420,18 @@ class InFixToPostFix:
|
||||
:return:
|
||||
"""
|
||||
|
||||
# manage parenthesis that didn't find any match
|
||||
if self._is_lpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
|
||||
# The parameter must be part the current concept being parsed
|
||||
assert len(self._concepts()) != 0 # sanity check
|
||||
|
||||
current_concept = self._concepts()[-1]
|
||||
while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX):
|
||||
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
|
||||
# eat everything that was expected
|
||||
if len(self.parameters_list) == 0:
|
||||
# current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'"
|
||||
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
|
||||
return
|
||||
del self.parameters_list[0]
|
||||
del current_concept.expected[0]
|
||||
@@ -506,6 +545,11 @@ class InFixToPostFix:
|
||||
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
|
||||
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
|
||||
|
||||
if not current.precedence:
|
||||
# precedence is not set (None or zero)
|
||||
# Do not apply any rule
|
||||
return False
|
||||
|
||||
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
|
||||
return True
|
||||
|
||||
@@ -528,9 +572,55 @@ class InFixToPostFix:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _pop_stack(c):
|
||||
while self.stack[-1] != c and not self._is_lpar(c):
|
||||
self.pop_stack_to_out()
|
||||
|
||||
if self._is_lpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
return False
|
||||
|
||||
# Manage concepts ending with long names
|
||||
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
||||
self.pop_stack_to_out()
|
||||
|
||||
for current_concept in reversed(self._concepts()):
|
||||
# As I may loose memory again ;-)
|
||||
# it's a reversed loop to manage cases like
|
||||
# if a plus b then ...
|
||||
# The current concept is 'plus', but the token is 'then'
|
||||
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
|
||||
|
||||
if current_concept.is_next(token):
|
||||
current_concept.end = pos
|
||||
current_concept.tokens.append(token)
|
||||
if current_concept.eat_token(token):
|
||||
_pop_stack(current_concept)
|
||||
return True
|
||||
|
||||
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
|
||||
if current_concept.expected[0].type == TokenKind.WHITESPACE:
|
||||
# drop it. It's the case where an optional whitespace is missing
|
||||
del (current_concept.expected[0])
|
||||
else:
|
||||
# error
|
||||
# We are not parsing the concept we tought we were parsing.
|
||||
# Transform the eaten tokens into unrecognized
|
||||
# and discard the current SyaConceptParserHelper
|
||||
# TODO: manage the pending LPAR, RPAR ?
|
||||
self._transform_to_unrecognized(current_concept)
|
||||
self.false_positives.append(current_concept)
|
||||
self.stack.pop()
|
||||
return False
|
||||
|
||||
if current_concept.is_expected(token):
|
||||
|
||||
# Fix the whitespace between var and expected if needed
|
||||
# current_concept.expected[0] is '<var>'
|
||||
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
|
||||
if current_concept.expected[1].type == TokenKind.WHITESPACE:
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
|
||||
current_concept.end = pos
|
||||
self.manage_unrecognized()
|
||||
# manage that some clones may have been forked
|
||||
@@ -550,36 +640,33 @@ class InFixToPostFix:
|
||||
self.parameters_list[:]))
|
||||
return True # no need to continue
|
||||
|
||||
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
||||
self.pop_stack_to_out()
|
||||
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
|
||||
current = self.stack[-1]
|
||||
if current.error:
|
||||
self._transform_to_unrecognized(current)
|
||||
self.false_positives.append(current)
|
||||
self.stack.pop()
|
||||
|
||||
if current_concept.expected[1].type == TokenKind.WHITESPACE:
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
|
||||
self.manage_unrecognized()
|
||||
# manage that some clones may have been forked
|
||||
for forked in self.forked:
|
||||
forked.handle_expected_token(token, pos)
|
||||
else:
|
||||
self.pop_stack_to_out()
|
||||
self.manage_parameters()
|
||||
|
||||
if current_concept.eat_token():
|
||||
while self.stack[-1] != current_concept and not self._is_lpar(current_concept):
|
||||
self.pop_stack_to_out()
|
||||
# maybe eat whitespace that was between <var> and expected token
|
||||
if current_concept.expected[0].type == TokenKind.WHITESPACE:
|
||||
del current_concept.expected[0]
|
||||
|
||||
if self._is_lpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
return False
|
||||
|
||||
# Manage concepts ending with long names
|
||||
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
||||
self.pop_stack_to_out()
|
||||
if current_concept.eat_token(token):
|
||||
_pop_stack(current_concept)
|
||||
|
||||
return True
|
||||
|
||||
# else:
|
||||
# if token.type != TokenKind.WHITESPACE:
|
||||
# # hack, because whitespaces are not correctly parsed in self.expected
|
||||
# # KSI 2020/04/25
|
||||
# # I no longer understand why we are in a loop (the reverse one)
|
||||
# # if we are parsing a concept and the expected token does not match
|
||||
# # The whole class should be in error
|
||||
# self._add_error(UnexpectedTokenErrorNode(
|
||||
# f"Failed to parse '{current_concept.concept.concept}'",
|
||||
# token, current_concept.expected))
|
||||
# return False
|
||||
|
||||
return False
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
@@ -692,10 +779,11 @@ class InFixToPostFix:
|
||||
|
||||
return False
|
||||
|
||||
def eat_concept(self, sya_concept_def, pos):
|
||||
def eat_concept(self, sya_concept_def, token, pos):
|
||||
"""
|
||||
a concept is found
|
||||
:param sya_concept_def:
|
||||
:param token:
|
||||
:param pos:
|
||||
:return:
|
||||
"""
|
||||
@@ -704,37 +792,43 @@ class InFixToPostFix:
|
||||
return
|
||||
self.debug.append(sya_concept_def)
|
||||
|
||||
temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos)
|
||||
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
|
||||
|
||||
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
|
||||
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
|
||||
|
||||
if Token.is_whitespace(parser_helper.last_token_before_first_token):
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
|
||||
# First, try to recognize the tokens that are waiting
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage the fact that some clone may have been forked
|
||||
forked.eat_concept(sya_concept_def, pos)
|
||||
forked.eat_concept(sya_concept_def, token, pos)
|
||||
|
||||
# then, check if this new concept is linked to the previous ones
|
||||
# ie, is the previous concept fully matched ?
|
||||
if temp_concept_node.expected_parameters_before_first_token == 0:
|
||||
if parser_helper.expected_parameters_before_first_token == 0:
|
||||
# => does not expect pending parameter (it's suffixed concept)
|
||||
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
|
||||
# => previous seems to have everything it needs in the parameter list
|
||||
self.pop_stack_to_out()
|
||||
|
||||
if temp_concept_node.is_atom():
|
||||
self._put_to_out(temp_concept_node.fix_concept())
|
||||
if parser_helper.is_atom():
|
||||
self._put_to_out(parser_helper.fix_concept())
|
||||
else:
|
||||
# call shunting yard algorithm
|
||||
while self.i_can_pop(temp_concept_node):
|
||||
while self.i_can_pop(parser_helper):
|
||||
self.pop_stack_to_out()
|
||||
|
||||
if temp_concept_node.is_matched():
|
||||
if parser_helper.is_matched():
|
||||
# case of a prefix concept which has found happiness with self.parameters_list
|
||||
# directly put it in out
|
||||
self.manage_parameters_when_new_concept(temp_concept_node)
|
||||
self._put_to_out(temp_concept_node.fix_concept())
|
||||
self.manage_parameters_when_new_concept(parser_helper)
|
||||
self._put_to_out(parser_helper.fix_concept())
|
||||
else:
|
||||
self.stack.append(temp_concept_node)
|
||||
self.manage_parameters_when_new_concept(temp_concept_node)
|
||||
self.stack.append(parser_helper)
|
||||
self.manage_parameters_when_new_concept(parser_helper)
|
||||
|
||||
def eat_unrecognized(self, token, pos):
|
||||
"""
|
||||
@@ -762,18 +856,34 @@ class InFixToPostFix:
|
||||
if len(self.stack) == 0 and len(self.out) == 0:
|
||||
return # no need to pop the buffer, as no concept is found
|
||||
|
||||
while len(self.stack) > 0:
|
||||
parser_helper = self.stack[-1]
|
||||
|
||||
# validate parenthesis
|
||||
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
|
||||
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
|
||||
return None
|
||||
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
|
||||
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
|
||||
if failed_to_match > 0:
|
||||
# didn't manage to read all tokens.
|
||||
# Transform them into unrecognized
|
||||
self._transform_to_unrecognized(parser_helper)
|
||||
self.false_positives.append(parser_helper)
|
||||
self.stack.pop() # discard the parser helper
|
||||
else:
|
||||
self.pop_stack_to_out() # process it
|
||||
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
|
||||
while len(self.stack) > 0:
|
||||
if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
return None
|
||||
|
||||
self.pop_stack_to_out()
|
||||
|
||||
def clone(self):
|
||||
clone = InFixToPostFix(self.context)
|
||||
clone.is_locked = self.is_locked
|
||||
@@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
try:
|
||||
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
|
||||
# little optim, no need to get the concept when parenthesis
|
||||
# little optim, no need to lock, unlock or get the concept when parenthesis
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_token(token, self.pos)
|
||||
continue
|
||||
@@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
if len(concepts) == 1:
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_concept(concepts[0], self.pos)
|
||||
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
@@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
for concept in concepts:
|
||||
clone = infix_to_postfix.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, self.pos)
|
||||
clone.eat_concept(concept, token, self.pos)
|
||||
res = temp_res
|
||||
|
||||
finally:
|
||||
@@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser):
|
||||
to_insert = item
|
||||
sequence.insert(0, to_insert)
|
||||
|
||||
if has_unrecognized:
|
||||
# Manage some sick cases where missing parenthesis mess the order or the sequence
|
||||
# example "foo bar(one plus two"
|
||||
sequence.sort(key=attrgetter("start"))
|
||||
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -1,912 +0,0 @@
|
||||
# #####################################################################################################
|
||||
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# # I don't directly use the project, but it helped me figure out
|
||||
# # what to do.
|
||||
# # Dejanović I., Milosavljević G., Vaderna R.:
|
||||
# # Arpeggio: A flexible PEG parser for Python,
|
||||
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
|
||||
# #####################################################################################################
|
||||
# from collections import namedtuple
|
||||
# from dataclasses import dataclass
|
||||
# from collections import defaultdict
|
||||
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
# from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
# from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
||||
# from parsers.BaseParser import BaseParser, ErrorNode
|
||||
# import core.utils
|
||||
#
|
||||
#
|
||||
# class NonTerminalNode(LexerNode):
|
||||
# """
|
||||
# Returned by the BnfNodeParser
|
||||
# """
|
||||
#
|
||||
# def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
# super().__init__(start, end, tokens)
|
||||
# self.parsing_expression = parsing_expression
|
||||
# self.children = children
|
||||
#
|
||||
# def __repr__(self):
|
||||
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
||||
# if len(self.children) > 0:
|
||||
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||
# else:
|
||||
# sub_names = ""
|
||||
# return name + sub_names
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, NonTerminalNode):
|
||||
# return False
|
||||
#
|
||||
# return self.parsing_expression == other.parsing_expression and \
|
||||
# self.start == other.start and \
|
||||
# self.end == other.end and \
|
||||
# self.children == other.children
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||
#
|
||||
#
|
||||
# class TerminalNode(LexerNode):
|
||||
# """
|
||||
# Returned by the BnfNodeParser
|
||||
# """
|
||||
#
|
||||
# def __init__(self, parsing_expression, start, end, value):
|
||||
# super().__init__(start, end, source=value)
|
||||
# self.parsing_expression = parsing_expression
|
||||
# self.value = value
|
||||
#
|
||||
# def __repr__(self):
|
||||
# name = self.parsing_expression.rule_name or ""
|
||||
# return name + f"'{self.value}'"
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, TerminalNode):
|
||||
# return False
|
||||
#
|
||||
# return self.parsing_expression == other.parsing_expression and \
|
||||
# self.start == other.start and \
|
||||
# self.end == other.end and \
|
||||
# self.value == other.value
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class UnknownConceptNode(ErrorNode):
|
||||
# concept_key: str
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class TooManyConceptNode(ErrorNode):
|
||||
# concept_key: str
|
||||
#
|
||||
#
|
||||
# class ParsingExpression:
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# self.elements = args
|
||||
#
|
||||
# nodes = kwargs.get('nodes', [])
|
||||
# if not hasattr(nodes, '__iter__'):
|
||||
# nodes = [nodes]
|
||||
# self.nodes = nodes
|
||||
#
|
||||
# self.rule_name = kwargs.get('rule_name', '')
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not isinstance(other, ParsingExpression):
|
||||
# return False
|
||||
#
|
||||
# return self.rule_name == other.rule_name and self.elements == other.elements
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.rule_name, self.elements))
|
||||
#
|
||||
# def parse(self, parser):
|
||||
# return self._parse(parser)
|
||||
#
|
||||
# def add_rule_name_if_needed(self, text):
|
||||
# return text + "=" + self.rule_name if self.rule_name else text
|
||||
#
|
||||
#
|
||||
# class ConceptExpression(ParsingExpression):
|
||||
# """
|
||||
# Will match a concept
|
||||
# It used only for rule definition
|
||||
#
|
||||
# When the grammar is created, it is replaced by the actual concept
|
||||
# """
|
||||
#
|
||||
# def __init__(self, concept, rule_name=""):
|
||||
# super().__init__(rule_name=rule_name)
|
||||
# self.concept = concept
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.add_rule_name_if_needed(f"{self.concept}")
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not super().__eq__(other):
|
||||
# return False
|
||||
#
|
||||
# if not isinstance(other, ConceptExpression):
|
||||
# return False
|
||||
#
|
||||
# if isinstance(self.concept, Concept):
|
||||
# return self.concept.name == other.concept.name
|
||||
#
|
||||
# # when it's only the name of the concept
|
||||
# return self.concept == other.concept
|
||||
#
|
||||
# def __hash__(self):
|
||||
# return hash((self.concept, self.rule_name))
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_parsing_expression_from_name(name):
|
||||
# tokens = Tokenizer(name)
|
||||
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
|
||||
# if len(nodes) == 1:
|
||||
# return nodes[0]
|
||||
# else:
|
||||
# sequence = Sequence(nodes)
|
||||
# sequence.nodes = nodes
|
||||
# return sequence
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return None
|
||||
#
|
||||
# self.concept = to_match # Memoize
|
||||
#
|
||||
# if to_match not in parser.concepts_grammars:
|
||||
# # Try to match the concept using its name
|
||||
# expr = self.get_parsing_expression_from_name(to_match.name)
|
||||
# node = expr.parse(parser)
|
||||
# else:
|
||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
||||
#
|
||||
# if node is None:
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
#
|
||||
#
|
||||
# class ConceptGroupExpression(ConceptExpression):
|
||||
# def _parse(self, parser):
|
||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return None
|
||||
#
|
||||
# self.concept = to_match # Memoize
|
||||
#
|
||||
# if to_match not in parser.concepts_grammars:
|
||||
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
|
||||
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
# expr = OrderedChoice(nodes)
|
||||
# expr.nodes = nodes
|
||||
# node = expr.parse(parser)
|
||||
# else:
|
||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
||||
#
|
||||
# if node is None:
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
#
|
||||
#
|
||||
# class Sequence(ParsingExpression):
|
||||
# """
|
||||
# Will match sequence of parser expressions in exact order they are defined.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = parser.pos
|
||||
#
|
||||
# children = []
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node is None:
|
||||
# return None
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
||||
#
|
||||
#
|
||||
# class OrderedChoice(ParsingExpression):
|
||||
# """
|
||||
# Will match one among multiple
|
||||
# It will stop at the first match (so the order of definition is important)
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
#
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node:
|
||||
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||
#
|
||||
# parser.seek(init_pos) # backtrack
|
||||
#
|
||||
# return None
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = "| ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
||||
#
|
||||
#
|
||||
# class Optional(ParsingExpression):
|
||||
# """
|
||||
# Will match or not the elements
|
||||
# if many matches, will choose longest one
|
||||
# If you need order, use Optional(OrderedChoice)
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||
#
|
||||
# for e in self.nodes:
|
||||
# node = e.parse(parser)
|
||||
# if node:
|
||||
# if node.end > selected_node.end:
|
||||
# selected_node = NonTerminalNode(
|
||||
# self,
|
||||
# node.start,
|
||||
# node.end,
|
||||
# parser.tokens[node.start: node.end + 1],
|
||||
# [node])
|
||||
#
|
||||
# parser.seek(init_pos) # backtrack
|
||||
#
|
||||
# if selected_node.end != -1:
|
||||
# parser.seek(selected_node.end)
|
||||
# parser.next_token() # eat the tokens found
|
||||
#
|
||||
# return selected_node
|
||||
#
|
||||
# def __repr__(self):
|
||||
# if len(self.elements) == 1:
|
||||
# return f"{self.elements[0]}?"
|
||||
# else:
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})?")
|
||||
#
|
||||
#
|
||||
# class Repetition(ParsingExpression):
|
||||
# """
|
||||
# Base class for all repetition-like parser expressions (?,*,+)
|
||||
# Args:
|
||||
# eolterm(bool): Flag that indicates that end of line should
|
||||
# terminate repetition match.
|
||||
# """
|
||||
#
|
||||
# def __init__(self, *elements, **kwargs):
|
||||
# super(Repetition, self).__init__(*elements, **kwargs)
|
||||
# self.sep = kwargs.get('sep', None)
|
||||
#
|
||||
#
|
||||
# class ZeroOrMore(Repetition):
|
||||
# """
|
||||
# ZeroOrMore will try to match parser expression specified zero or more
|
||||
# times. It will never fail.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = -1
|
||||
# children = []
|
||||
#
|
||||
# while True:
|
||||
# current_pos = parser.pos
|
||||
#
|
||||
# # maybe eat the separator if needed
|
||||
# if self.sep and children:
|
||||
# sep_result = self.sep.parse(parser)
|
||||
# if sep_result is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
#
|
||||
# # eat the ZeroOrMore
|
||||
# node = self.nodes[0].parse(parser)
|
||||
# if node is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# if len(children) == 0:
|
||||
# return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})*")
|
||||
#
|
||||
#
|
||||
# class OneOrMore(Repetition):
|
||||
# """
|
||||
# OneOrMore will try to match parser expression specified one or more times.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# init_pos = parser.pos
|
||||
# end_pos = -1
|
||||
# children = []
|
||||
#
|
||||
# while True:
|
||||
# current_pos = parser.pos
|
||||
#
|
||||
# # maybe eat the separator if needed
|
||||
# if self.sep and children:
|
||||
# sep_result = self.sep.parse(parser)
|
||||
# if sep_result is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
#
|
||||
# # eat the ZeroOrMore
|
||||
# node = self.nodes[0].parse(parser)
|
||||
# if node is None:
|
||||
# parser.seek(current_pos)
|
||||
# break
|
||||
# else:
|
||||
# if node.end != -1: # because returns -1 when no match
|
||||
# children.append(node)
|
||||
# end_pos = node.end
|
||||
#
|
||||
# if len(children) == 0: # if nothing is found, it's an error
|
||||
# return None
|
||||
#
|
||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return self.add_rule_name_if_needed(f"({to_str})+")
|
||||
#
|
||||
#
|
||||
# class UnorderedGroup(Repetition):
|
||||
# """
|
||||
# Will try to match all of the parsing expression in any order.
|
||||
# """
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
# # def __repr__(self):
|
||||
# # to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# # return f"({to_str})#"
|
||||
#
|
||||
#
|
||||
# class Match(ParsingExpression):
|
||||
# """
|
||||
# Base class for all classes that will try to match something from the input.
|
||||
# """
|
||||
#
|
||||
# def __init__(self, rule_name, root=False):
|
||||
# super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
#
|
||||
# def parse(self, parser):
|
||||
# result = self._parse(parser)
|
||||
# return result
|
||||
#
|
||||
#
|
||||
# class StrMatch(Match):
|
||||
# """
|
||||
# Matches a literal
|
||||
# """
|
||||
#
|
||||
# def __init__(self, to_match, rule_name="", ignore_case=True):
|
||||
# super(Match, self).__init__(rule_name=rule_name)
|
||||
# self.to_match = to_match
|
||||
# self.ignore_case = ignore_case
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
||||
#
|
||||
# def __eq__(self, other):
|
||||
# if not super().__eq__(other):
|
||||
# return False
|
||||
#
|
||||
# if not isinstance(other, StrMatch):
|
||||
# return False
|
||||
#
|
||||
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# token = parser.get_token()
|
||||
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
|
||||
# else token.value == self.to_match
|
||||
#
|
||||
# if m:
|
||||
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
|
||||
# parser.next_token()
|
||||
# return node
|
||||
#
|
||||
# return None
|
||||
#
|
||||
#
|
||||
# class BnfNodeParser(BaseParser):
|
||||
# def __init__(self, **kwargs):
|
||||
# super().__init__("BnfNode_old", 50)
|
||||
# self.enabled = False
|
||||
# if 'grammars' in kwargs:
|
||||
# self.concepts_grammars = kwargs.get("grammars")
|
||||
# elif 'sheerka' in kwargs:
|
||||
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||
# else:
|
||||
# self.concepts_grammars = {}
|
||||
#
|
||||
# self.ignore_case = True
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.tokens = None
|
||||
#
|
||||
# self.context = None
|
||||
# self.text = None
|
||||
# self.sheerka = None
|
||||
#
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.next_token()
|
||||
# return error
|
||||
#
|
||||
# def reset_parser(self, context, text):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
# except core.tokenizer.LexerError as e:
|
||||
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
# return False
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.next_token(False)
|
||||
# return True
|
||||
#
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
#
|
||||
# def seek(self, pos):
|
||||
# self.pos = pos
|
||||
# self.token = self.tokens[self.pos]
|
||||
# return True
|
||||
#
|
||||
# def rewind(self, offset, skip_whitespace=True):
|
||||
# self.pos += offset
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
|
||||
# self.pos -= 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# def initialize(self, context, concepts_definitions):
|
||||
# """
|
||||
# Adds a bunch of concepts, and how they can be recognized
|
||||
# :param context: execution context
|
||||
# :param concepts_definitions: dictionary of concept, concept_definition
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# concepts_to_resolve = set()
|
||||
#
|
||||
# for concept, concept_def in concepts_definitions.items():
|
||||
# # ## Gets the grammars
|
||||
# context.log(f"Resolving grammar for '{concept}'", context.who)
|
||||
# concept.init_key() # make sure that the key is initialized
|
||||
# grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||
# self.concepts_grammars[concept] = grammar
|
||||
#
|
||||
# if self.has_error:
|
||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
#
|
||||
# # ## Removes concepts with infinite recursions
|
||||
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||
# for concept in concepts_to_remove:
|
||||
# concepts_to_resolve.remove(concept)
|
||||
# del self.concepts_grammars[concept]
|
||||
#
|
||||
# if self.has_error:
|
||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
# else:
|
||||
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||
#
|
||||
# def get_concept(self, concept_name):
|
||||
# if concept_name in self.context.concepts:
|
||||
# return self.context.concepts[concept_name]
|
||||
# return self.sheerka.get_by_key(concept_name)
|
||||
#
|
||||
# def get_model(self, concept_def, concepts_to_resolve):
|
||||
#
|
||||
# # TODO
|
||||
# # inner_get_model must not modify the initial ParsingExpression
|
||||
# # A copy must be created
|
||||
# def inner_get_model(expression):
|
||||
# if isinstance(expression, Concept):
|
||||
# if self.sheerka.isaset(self.context, expression):
|
||||
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
# else:
|
||||
# ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
# concepts_to_resolve.add(expression)
|
||||
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
|
||||
# if expression.rule_name is None or expression.rule_name == "":
|
||||
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
# else expression.concept
|
||||
# if isinstance(expression.concept, str):
|
||||
# concept = self.get_concept(expression.concept)
|
||||
# if self.sheerka.is_known(concept):
|
||||
# expression.concept = concept
|
||||
# concepts_to_resolve.add(expression.concept)
|
||||
# ret = expression
|
||||
# elif isinstance(expression, str):
|
||||
# ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
# elif isinstance(expression, StrMatch):
|
||||
# ret = expression
|
||||
# if ret.ignore_case is None:
|
||||
# ret.ignore_case = self.ignore_case
|
||||
# elif isinstance(expression, Sequence) or \
|
||||
# isinstance(expression, OrderedChoice) or \
|
||||
# isinstance(expression, ZeroOrMore) or \
|
||||
# isinstance(expression, OneOrMore) or \
|
||||
# isinstance(expression, Optional):
|
||||
# ret = expression
|
||||
# ret.nodes = [inner_get_model(e) for e in ret.elements]
|
||||
# else:
|
||||
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
#
|
||||
# # Translate separator expression.
|
||||
# if isinstance(expression, Repetition) and expression.sep:
|
||||
# expression.sep = inner_get_model(expression.sep)
|
||||
#
|
||||
# return ret
|
||||
#
|
||||
# model = inner_get_model(concept_def)
|
||||
#
|
||||
# return model
|
||||
#
|
||||
# def detect_infinite_recursion(self, concepts_to_resolve):
|
||||
#
|
||||
# # infinite recursion matcher
|
||||
# def _is_infinite_recursion(ref_concept, node):
|
||||
# if isinstance(node, ConceptExpression):
|
||||
# if node.concept == ref_concept:
|
||||
# return True
|
||||
#
|
||||
# if isinstance(node.concept, str):
|
||||
# to_match = self.get_concept(node.concept)
|
||||
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return False
|
||||
# else:
|
||||
# to_match = node.concept
|
||||
#
|
||||
# if to_match not in self.concepts_grammars:
|
||||
# return False
|
||||
#
|
||||
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||
#
|
||||
# if isinstance(node, OrderedChoice):
|
||||
# return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||
#
|
||||
# if isinstance(node, Sequence):
|
||||
# for node in node.nodes:
|
||||
# if _is_infinite_recursion(ref_concept, node):
|
||||
# return True
|
||||
# return False
|
||||
#
|
||||
# return False
|
||||
#
|
||||
# removed_concepts = []
|
||||
# for e in concepts_to_resolve:
|
||||
# if isinstance(e, str):
|
||||
# e = self.get_concept(e)
|
||||
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# continue
|
||||
#
|
||||
# if e not in self.concepts_grammars:
|
||||
# continue
|
||||
#
|
||||
# to_resolve = self.concepts_grammars[e]
|
||||
# if _is_infinite_recursion(e, to_resolve):
|
||||
# removed_concepts.append(e)
|
||||
# return removed_concepts
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# if parser_input == "":
|
||||
# return context.sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
# )
|
||||
#
|
||||
# if not self.reset_parser(context, parser_input):
|
||||
# return self.sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
#
|
||||
# concepts_found = [[]]
|
||||
# unrecognized_tokens = None
|
||||
# has_unrecognized = False
|
||||
#
|
||||
# # actually list of list
|
||||
# # The first dimension is the number of possibilities found
|
||||
# # The second dimension is the number of concepts found, under one possibility
|
||||
# #
|
||||
# # Example 1
|
||||
# # concept foo : 'one' 'two'
|
||||
# # concept bar : 'one' 'two'
|
||||
# # input 'one two' -> will produce two possibilities (foo and bar).
|
||||
# #
|
||||
# # Example 2
|
||||
# # concept foo : 'one'
|
||||
# # concept bar : 'two'
|
||||
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
|
||||
#
|
||||
# while True:
|
||||
# init_pos = self.pos
|
||||
# res = []
|
||||
#
|
||||
# for concept, grammar in self.concepts_grammars.items():
|
||||
# self.seek(init_pos)
|
||||
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
# if node is not None and node.end != -1:
|
||||
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
|
||||
# concept_node = ConceptNode(
|
||||
# updated_concept,
|
||||
# node.start,
|
||||
# node.end,
|
||||
# self.tokens[node.start: node.end + 1],
|
||||
# None,
|
||||
# node)
|
||||
# res.append(concept_node)
|
||||
#
|
||||
# if len(res) == 0: # not recognized
|
||||
# self.seek(init_pos)
|
||||
# if unrecognized_tokens:
|
||||
# unrecognized_tokens.add_token(self.get_token(), init_pos)
|
||||
# else:
|
||||
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
|
||||
#
|
||||
# if not self.next_token(False):
|
||||
# break
|
||||
#
|
||||
# else: # some concepts are recognized
|
||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
# unrecognized_tokens.fix_source()
|
||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
# has_unrecognized = True
|
||||
# unrecognized_tokens = None
|
||||
#
|
||||
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||
# concepts_found = core.utils.product(concepts_found, res)
|
||||
#
|
||||
# # loop
|
||||
# self.seek(res[0].end)
|
||||
# if not self.next_token(False):
|
||||
# break
|
||||
#
|
||||
# # Fix the source for unrecognized tokens
|
||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
# unrecognized_tokens.fix_source()
|
||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
# has_unrecognized = True
|
||||
#
|
||||
# # else
|
||||
# # returns as many ReturnValue than choices found
|
||||
# ret = []
|
||||
# for choice in concepts_found:
|
||||
# ret.append(
|
||||
# self.sheerka.ret(
|
||||
# self.name,
|
||||
# not has_unrecognized,
|
||||
# self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input,
|
||||
# body=choice,
|
||||
# try_parsed=choice)))
|
||||
#
|
||||
# if len(ret) == 1:
|
||||
# self.log_result(context, parser_input, ret[0])
|
||||
# return ret[0]
|
||||
# else:
|
||||
# self.log_multiple_results(context, parser_input, ret)
|
||||
# return ret
|
||||
#
|
||||
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
# """
|
||||
# Updates the properties of the concept
|
||||
# Goes in recursion if the property is a concept
|
||||
# """
|
||||
#
|
||||
# # this cache is to make sure that we return the same concept for the same ConceptExpression
|
||||
# _underlying_value_cache = {}
|
||||
#
|
||||
# def _add_prop(_concept, prop_name, value):
|
||||
# """
|
||||
# Adds a new entry,
|
||||
# makes a list if the property already exists
|
||||
# """
|
||||
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
|
||||
# # new entry
|
||||
# _concept.compiled[prop_name] = value
|
||||
# else:
|
||||
# # make a list if there was a value
|
||||
# previous_value = _concept.compiled[prop_name]
|
||||
# if isinstance(previous_value, list):
|
||||
# previous_value.append(value)
|
||||
# else:
|
||||
# new_value = [previous_value, value]
|
||||
# _concept.compiled[prop_name] = new_value
|
||||
#
|
||||
# def _look_for_concept_match(_underlying):
|
||||
# """
|
||||
# At some point, there is either an StrMatch or a ConceptMatch,
|
||||
# that allowed the recognition.
|
||||
# Look for the ConceptMatch, with recursion if needed
|
||||
# """
|
||||
# if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
# return _underlying
|
||||
#
|
||||
# if not isinstance(_underlying, NonTerminalNode):
|
||||
# return None
|
||||
#
|
||||
# if len(_underlying.children) != 1:
|
||||
# return None
|
||||
#
|
||||
# return _look_for_concept_match(_underlying.children[0])
|
||||
#
|
||||
# def _get_underlying_value(_underlying):
|
||||
# concept_match_node = _look_for_concept_match(_underlying)
|
||||
# if concept_match_node:
|
||||
# # the value is a concept
|
||||
# if id(concept_match_node) in _underlying_value_cache:
|
||||
# result = _underlying_value_cache[id(concept_match_node)]
|
||||
# else:
|
||||
# ref_tpl = concept_match_node.parsing_expression.concept
|
||||
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
# _underlying_value_cache[id(concept_match_node)] = result
|
||||
# else:
|
||||
# # the value is a string
|
||||
# result = DoNotResolve(_underlying.source)
|
||||
#
|
||||
# return result
|
||||
#
|
||||
# def _process_rule_name(_concept, _underlying):
|
||||
# if _underlying.parsing_expression.rule_name:
|
||||
# value = _get_underlying_value(_underlying)
|
||||
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||
# _concept.metadata.need_validation = True
|
||||
#
|
||||
# if isinstance(_underlying, NonTerminalNode):
|
||||
# for child in _underlying.children:
|
||||
# _process_rule_name(_concept, child)
|
||||
#
|
||||
# key = (template.key, template.id) if template.id else template.key
|
||||
# concept = sheerka.new(key)
|
||||
# if init_empty_body and concept.metadata.body is None:
|
||||
# value = _get_underlying_value(underlying)
|
||||
# concept.compiled[ConceptParts.BODY] = value
|
||||
# if underlying.parsing_expression.rule_name:
|
||||
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
# # KSI : Why don't we set concept.metadata.need_validation to True ?
|
||||
#
|
||||
# if isinstance(underlying, NonTerminalNode):
|
||||
# for node in underlying.children:
|
||||
# _process_rule_name(concept, node)
|
||||
#
|
||||
# return concept
|
||||
#
|
||||
# def encode_grammar(self, grammar):
|
||||
# """
|
||||
# Transform the grammar into something that can easily can be serialized
|
||||
# :param grammar:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# def _encode(expression):
|
||||
# if isinstance(expression, StrMatch):
|
||||
# res = f"'{expression.to_match}'"
|
||||
#
|
||||
# elif isinstance(expression, ConceptExpression):
|
||||
# res = core.utils.str_concept(expression.concept)
|
||||
#
|
||||
# elif isinstance(expression, Sequence):
|
||||
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
|
||||
#
|
||||
# elif isinstance(expression, OrderedChoice):
|
||||
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
|
||||
#
|
||||
# elif isinstance(expression, Optional):
|
||||
# res = _encode(expression.nodes[0]) + "?"
|
||||
#
|
||||
# elif isinstance(expression, ZeroOrMore):
|
||||
# res = _encode(expression.nodes[0]) + "*"
|
||||
#
|
||||
# elif isinstance(expression, OneOrMore):
|
||||
# res = _encode(expression.nodes[0]) + "+"
|
||||
#
|
||||
# if expression.rule_name:
|
||||
# res += "=" + expression.rule_name
|
||||
#
|
||||
# return res
|
||||
#
|
||||
# result = {}
|
||||
# for k, v in grammar.items():
|
||||
# key = core.utils.str_concept(k)
|
||||
# value = _encode(v)
|
||||
# result[key] = value
|
||||
# return result
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_bests(results):
|
||||
# """
|
||||
# Returns the result that is the longest
|
||||
# :param results:
|
||||
# :return:
|
||||
# """
|
||||
# by_end_pos = defaultdict(list)
|
||||
# for result in results:
|
||||
# by_end_pos[result.end].append(result)
|
||||
#
|
||||
# return by_end_pos[max(by_end_pos)]
|
||||
#
|
||||
#
|
||||
# class ParsingExpressionVisitor:
|
||||
# """
|
||||
# visit ParsingExpression
|
||||
# """
|
||||
#
|
||||
# def visit(self, parsing_expression):
|
||||
# name = parsing_expression.__class__.__name__
|
||||
#
|
||||
# method = 'visit_' + name
|
||||
# visitor = getattr(self, method, self.generic_visit)
|
||||
# return visitor(parsing_expression)
|
||||
#
|
||||
# def generic_visit(self, parsing_expression):
|
||||
# if hasattr(self, "visit_all"):
|
||||
# self.visit_all(parsing_expression)
|
||||
#
|
||||
# for node in parsing_expression.elements:
|
||||
# if isinstance(node, Concept):
|
||||
# self.visit(ConceptExpression(node.key or node.name))
|
||||
# elif isinstance(node, str):
|
||||
# self.visit(StrMatch(node))
|
||||
# else:
|
||||
# self.visit(node)
|
||||
@@ -1,108 +0,0 @@
|
||||
# # try to match something like
|
||||
# # ConceptNode 'plus' ConceptNode
|
||||
# #
|
||||
# # Replaced by SyaNodeParser
|
||||
# from core.builtin_concepts import BuiltinConcepts
|
||||
# from core.tokenizer import TokenKind, Token
|
||||
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
# from parsers.BaseParser import BaseParser
|
||||
# from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
# from core.concept import VARIABLE_PREFIX
|
||||
#
|
||||
# multiple_concepts_parser = MultipleConceptsParser()
|
||||
#
|
||||
#
|
||||
# class ConceptsWithConceptsParser(BaseParser):
|
||||
# def __init__(self, **kwargs):
|
||||
# super().__init__("ConceptsWithConcepts", 25)
|
||||
# self.enabled = False
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_tokens(nodes):
|
||||
# tokens = []
|
||||
#
|
||||
# for node in nodes:
|
||||
# if isinstance(node, ConceptNode):
|
||||
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
||||
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
||||
# else:
|
||||
# for token in node.tokens:
|
||||
# if token.type == TokenKind.EOF:
|
||||
# break
|
||||
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
||||
# continue
|
||||
# else:
|
||||
# tokens.append(token)
|
||||
#
|
||||
# return tokens
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_key(nodes):
|
||||
# key = ""
|
||||
# index = 0
|
||||
# for node in nodes:
|
||||
# if key:
|
||||
# key += " "
|
||||
#
|
||||
# if isinstance(node, UnrecognizedTokensNode):
|
||||
# key += node.source.strip()
|
||||
# else:
|
||||
# key += f"{VARIABLE_PREFIX}{index}"
|
||||
# index += 1
|
||||
#
|
||||
# return key
|
||||
#
|
||||
# def finalize_concept(self, context, concept, nodes):
|
||||
# index = 0
|
||||
# for node in nodes:
|
||||
#
|
||||
# if isinstance(node, ConceptNode):
|
||||
# prop_name = list(concept.props.keys())[index]
|
||||
# concept.compiled[prop_name] = node.concept
|
||||
# context.log(
|
||||
# f"Setting property '{prop_name}='{node.concept}'.",
|
||||
# self.name)
|
||||
# index += 1
|
||||
# elif isinstance(node, SourceCodeNode):
|
||||
# prop_name = list(concept.props.keys())[index]
|
||||
# sheerka = context.sheerka
|
||||
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
||||
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
||||
# context.log(
|
||||
# f"Setting property '{prop_name}'='Python({node.source})'.",
|
||||
# self.name)
|
||||
# index += 1
|
||||
#
|
||||
# return concept
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# sheerka = context.sheerka
|
||||
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
# if not nodes:
|
||||
# return None
|
||||
#
|
||||
# concept_key = self.get_key(nodes)
|
||||
# concept = sheerka.new(concept_key)
|
||||
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# return sheerka.ret(
|
||||
# self.name,
|
||||
# False,
|
||||
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
#
|
||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
# for concept in concepts:
|
||||
# self.finalize_concept(context, concept, nodes)
|
||||
#
|
||||
# res = []
|
||||
# for concept in concepts:
|
||||
# res.append(sheerka.ret(
|
||||
# self.name,
|
||||
# True,
|
||||
# sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input.source,
|
||||
# body=concept,
|
||||
# try_parsed=None)))
|
||||
#
|
||||
# return res[0] if len(res) == 1 else res
|
||||
@@ -1,163 +0,0 @@
|
||||
# # to be replaced by SyaNodeParser
|
||||
# import ast
|
||||
#
|
||||
# from core.builtin_concepts import BuiltinConcepts
|
||||
# from core.tokenizer import TokenKind
|
||||
# from parsers.BaseNodeParser import SourceCodeNode
|
||||
# from parsers.BaseParser import BaseParser
|
||||
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
# import core.utils
|
||||
# from parsers.PythonParser import PythonParser
|
||||
#
|
||||
# concept_lexer_parser = BnfNodeParser()
|
||||
#
|
||||
#
|
||||
# class MultipleConceptsParser(BaseParser):
|
||||
# """
|
||||
# Parser that will take the result of BnfNodeParser and
|
||||
# try to resolve the unrecognized tokens token by token
|
||||
#
|
||||
# It is a success when it returns a list ConceptNode exclusively
|
||||
# """
|
||||
#
|
||||
# def __init__(self, **kwargs):
|
||||
# BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
# self.enabled = False
|
||||
#
|
||||
# @staticmethod
|
||||
# def finalize(nodes_found, unrecognized_tokens):
|
||||
# if not unrecognized_tokens:
|
||||
# return nodes_found, unrecognized_tokens
|
||||
#
|
||||
# unrecognized_tokens.fix_source()
|
||||
# if unrecognized_tokens.not_whitespace():
|
||||
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
#
|
||||
# return nodes_found, None
|
||||
#
|
||||
# @staticmethod
|
||||
# def create_or_add(unrecognized_tokens, token, index):
|
||||
# if unrecognized_tokens:
|
||||
# unrecognized_tokens.add_token(token, index)
|
||||
# else:
|
||||
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
# return unrecognized_tokens
|
||||
#
|
||||
# def parse(self, context, parser_input):
|
||||
# sheerka = context.sheerka
|
||||
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
# if not nodes:
|
||||
# return None
|
||||
#
|
||||
# nodes_found = [[]]
|
||||
# concepts_only = True
|
||||
#
|
||||
# for node in nodes:
|
||||
# if isinstance(node, UnrecognizedTokensNode):
|
||||
# unrecognized_tokens = None
|
||||
# i = 0
|
||||
#
|
||||
# while i < len(node.tokens):
|
||||
#
|
||||
# token_index = node.start + i
|
||||
# token = node.tokens[i]
|
||||
#
|
||||
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
# if concepts_nodes is not None:
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
# i += 1
|
||||
# continue
|
||||
#
|
||||
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
# if source_code_node:
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
# nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
# i += len(source_code_node.tokens)
|
||||
# continue
|
||||
#
|
||||
# # not a concept nor some source code
|
||||
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
# i += 1
|
||||
#
|
||||
# # finish processing if needed
|
||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
#
|
||||
# else:
|
||||
# nodes_found = core.utils.product(nodes_found, [node])
|
||||
#
|
||||
# ret = []
|
||||
# for choice in nodes_found:
|
||||
# ret.append(
|
||||
# sheerka.ret(
|
||||
# self.name,
|
||||
# concepts_only,
|
||||
# sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=parser_input.source,
|
||||
# body=choice,
|
||||
# try_parsed=None))
|
||||
# )
|
||||
#
|
||||
# if len(ret) == 1:
|
||||
# self.log_result(context, parser_input.source, ret[0])
|
||||
# return ret[0]
|
||||
# else:
|
||||
# self.log_multiple_results(context, parser_input.source, ret)
|
||||
# return ret
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_concepts_nodes(context, index, token):
|
||||
# """
|
||||
# Tries to recognize a concept
|
||||
# from the univers of all known concepts
|
||||
# """
|
||||
#
|
||||
# if token.type != TokenKind.IDENTIFIER:
|
||||
# return None
|
||||
#
|
||||
# concept = context.new_concept(token.value)
|
||||
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
# return concepts_nodes
|
||||
#
|
||||
# return None
|
||||
#
|
||||
# @staticmethod
|
||||
# def get_source_code_node(context, index, tokens):
|
||||
# """
|
||||
# Tries to recognize source code.
|
||||
# For the time being, only Python is supported
|
||||
# :param context:
|
||||
# :param tokens:
|
||||
# :param index:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
# return None
|
||||
#
|
||||
# end_index = len(tokens)
|
||||
# while end_index > 0:
|
||||
# parser = PythonParser()
|
||||
# tokens_to_parse = tokens[:end_index]
|
||||
# res = parser.parse(context, tokens_to_parse)
|
||||
# if res.status:
|
||||
# # only expression are accepted
|
||||
# ast_ = res.value.value.ast_
|
||||
# if not isinstance(ast_, ast.Expression):
|
||||
# return None
|
||||
# try:
|
||||
# compiled = compile(ast_, "<string>", "eval")
|
||||
# eval(compiled, {}, {})
|
||||
# except Exception:
|
||||
# return None
|
||||
#
|
||||
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
# end_index -= 1
|
||||
#
|
||||
# return None
|
||||
Reference in New Issue
Block a user