Fixed SyaNodeParser false positive recognition issue

This commit is contained in:
2020-05-15 10:36:05 +02:00
parent 6e343ba996
commit 5489ef00b9
24 changed files with 484 additions and 5741 deletions
+1 -19
View File
@@ -349,9 +349,6 @@ class EnumerationConcept(Concept):
self.set_value(ConceptParts.BODY, iteration)
self.metadata.is_evaluated = True
# def __iter__(self):
# return iter(self.body)
class ListConcept(Concept):
def __init__(self, items=None):
@@ -362,21 +359,6 @@ class ListConcept(Concept):
def append(self, obj):
self.body.append(obj)
# def __len__(self):
# return len(self.body)
#
# def __getitem__(self, key):
# return self.body[key]
#
# def __setitem__(self, key, value):
# self.body[key] = value
#
# def __iter__(self):
# return iter(self.body)
#
# def __contains__(self, item):
# return item in self.body
class FilteredConcept(Concept):
def __init__(self, filtered=None, iterable=None, predicate=None):
@@ -450,5 +432,5 @@ class ExplanationConcept(Concept):
self.set_value("command", command) # explain command parameters
self.set_value("title", title) # a title to the explanation
self.set_value("instructions", instructions) # instructions for SheerkaPrint
self.set_value(ConceptParts.BODY, execution_result) # list of results
self.set_value(ConceptParts.BODY, execution_result) # list of results
self.metadata.is_evaluated = True
+1
View File
@@ -326,6 +326,7 @@ def ensure_evaluated(context, concept):
return evaluated
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
"""
Using parsers, try to recognize concepts from source
+3 -3
View File
@@ -221,7 +221,7 @@ class Concept:
Create the key for this concept.
Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake
The method is not called 'set_key' to make sure that no other class set the key by mistake
:param tokens:
:return:
"""
@@ -248,8 +248,8 @@ class Concept:
if token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value))
else:
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += value
#value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += token.value
first = False
self.metadata.key = key
@@ -56,12 +56,6 @@ class SheerkaCreateNewConcept:
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# update concept definition by key
# init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True)
# if not init_sya_ret_value.status:
# return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
# concepts_by_first_keyword = init_sya_ret_value.body
concept.freeze_definition_hash()
cache_manager.add_concept(concept)
@@ -74,21 +68,3 @@ class SheerkaCreateNewConcept:
# process the return if needed
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
# def load_concepts_nodes_definitions(self, context):
# """
# Gets from sdp what is need to parse nodes
# :return:
# """
# sdp = self.sheerka.sdp
#
# concepts_by_first_keyword = sdp.get(
# self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# resolved_concepts_by_first_keyword = init_ret_value.body
#
# return concepts_by_first_keyword, resolved_concepts_by_first_keyword
+4 -3
View File
@@ -1,8 +1,9 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
NO_MATCH = "** No Match **"
class SheerkaExecute:
"""
Manage the execution of a process flow
@@ -58,7 +59,8 @@ class SheerkaExecute:
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
# execution_context.log(f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context:
with execution_context.push(desc=f"Parsing using {parser.name}",
logger=parser.verbose_log) as sub_context:
sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse)
if res is not None:
@@ -86,7 +88,6 @@ class SheerkaExecute:
stop_processing = True
sub_context.add_values(return_values=res)
if stop_processing:
break # Do not try the other priorities if a match is found
@@ -35,7 +35,7 @@ class SheerkaModifyConcept:
# TODO : update concept by first keyword
# TODO : update resolved by first keyword
# TODO : update concets grammars
# TODO : update concepts grammars
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
@@ -1,8 +1,6 @@
from dataclasses import dataclass
from typing import List
from sdp.sheerkaSerializer import Serializer
@dataclass
class Variable:
-48
View File
@@ -60,10 +60,6 @@ class Sheerka(Concept):
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
# # Cache for concepts grammars
# # To be shared between BNFNode parsers instances
# self.concepts_grammars = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts
@@ -303,27 +299,6 @@ class Sheerka(Concept):
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# sya = self.bnf.resolve_sya_associativity_and_precedence()
# self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya)
#
#
# self.concepts_by_first_keyword, \
# self.resolved_concepts_by_first_keyword = \
# self.create_new_concept_handler.load_concepts_nodes_definitions(context)
# self.concepts_by_first_keyword = self.sdp.get_safe(
# self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# self.sya_definitions = self.sdp.get_safe(
# self.CONCEPTS_SYA_DEFINITION_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# self.resolved_concepts_by_first_keyword = init_ret_value.body
def reset(self, cache_only=False):
self.cache_manager.clear()
self.cache_manager.cache_only = cache_only
@@ -346,7 +321,6 @@ class Sheerka(Concept):
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
# execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED)
steps = [
BuiltinConcepts.BEFORE_PARSING,
@@ -525,28 +499,6 @@ class Sheerka(Concept):
return concept
#
# def get(self, concept_key, concept_id=None):
# """
# Tries to find a concept
# What is return must be used a template for another concept.
# You must not modify the returned concept
# :param concept_key: key of the concept
# :param concept_id: when multiple concepts with the same key, use the id
# :return:
# """
#
# by_key = self.get_by_key(concept_key)
# if self.is_known(by_key):
# return by_key
#
# # else return by name
# by_name = self.get_by_name(concept_key)
# if self.is_known(by_name):
# return by_name
#
# return by_key # return not found for key
def get_by_key(self, concept_key, concept_id=None):
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
+33 -3
View File
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from enum import Enum
@@ -48,6 +48,7 @@ class TokenKind(Enum):
DEGREE = "degree" # °
WORD = "word"
EQUALSEQUALS = "=="
VAR_DEF = "__var__"
@dataclass()
@@ -58,6 +59,8 @@ class Token:
line: int
column: int
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self):
if self.type == TokenKind.IDENTIFIER:
value = str(self.value)
@@ -72,6 +75,23 @@ class Token:
return f"Token({value})"
@property
def str_value(self):
if self._str_value:
return self._str_value
if self.type == TokenKind.STRING:
self._str_value = self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
self._str_value = self.value.value
else:
self._str_value = str(self.value)
return self._str_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
@dataclass()
class LexerError(Exception):
@@ -101,12 +121,13 @@ class Tokenizer:
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text, parse_word=False):
def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
self.yield_eof = yield_eof
self.parse_word = parse_word
def __iter__(self):
@@ -134,6 +155,7 @@ class Tokenizer:
self.i += 1
self.column += 1
elif c == "_":
from core.concept import VARIABLE_PREFIX
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -141,6 +163,13 @@ class Tokenizer:
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif self.i + 7 < self.text_len and \
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
self.text[self.i + 7].isdigit():
number = self.eat_number(self.i + 7)
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
self.i += 7 + len(number)
self.column += 7 + len(number)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
@@ -308,7 +337,8 @@ class Tokenizer:
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
if self.yield_eof:
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept(self, start, line, column):
key, id, buffer = None, None, ""
+2 -2
View File
@@ -91,7 +91,7 @@ class AtomConceptParserHelper:
self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
if self.expected_tokens[0] != token.str_value:
self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token,
@@ -119,7 +119,7 @@ class AtomConceptParserHelper:
forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
if not expected:
# the concept is already matched
+20 -12
View File
@@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode):
self.is_frozen = False
self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos):
if self.is_frozen:
raise Exception("The node is frozen")
@@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode):
return self
def pop(self, token_kind):
if self.is_frozen:
raise Exception("The node is frozen")
if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
self.tokens.pop()
if len(self.tokens) == 0:
self.reset()
else:
self.end -= 1
def has_open_paren(self):
return self.parenthesis_count > 0
def not_whitespace(self):
return not self.is_whitespace()
@@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode):
def is_empty(self):
return len(self.tokens) == 0
def last_token_type(self):
if len(self.tokens) == 0:
return None
return self.tokens[-1].type
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
@@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser):
return custom_concepts if custom else None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
@staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
"""
+193 -78
View File
@@ -1,15 +1,16 @@
from collections import namedtuple
from dataclasses import dataclass, field
from operator import attrgetter
from typing import List
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import Token, TokenKind
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
from parsers.BaseParser import ErrorNode
PARSERS = ["BnfNode", "AtomNode", "Python"]
@@ -88,10 +89,13 @@ class SyaConceptParserHelper:
concept: Concept
start: int # position of the token in the tokenizer (Caution, it is not token.index)
end: int = field(default=-1, repr=False, compare=False, hash=None)
expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None)
expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
error: str = None
def __post_init__(self):
@@ -99,17 +103,20 @@ class SyaConceptParserHelper:
if self.end == -1:
self.end = self.start
first_keyword_found = False
for name in concept.key.split():
if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found:
first_keyword_found = True
first_keyword_found = None
for token in Tokenizer(concept.key, yield_eof=False):
if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
first_keyword_found = token
if first_keyword_found:
self.expected.append(name)
self.expected.append(token)
else:
self.expected_parameters_before_first_token += 1
self.last_token_before_first_token = token
if token.type != TokenKind.WHITESPACE:
self.expected_parameters_before_first_token += 1
self.eat_token() # remove the fist token
self.eat_token(first_keyword_found) # remove the first token
self.tokens.append(first_keyword_found)
def is_matched(self):
return len(self.expected) == 0
@@ -117,23 +124,38 @@ class SyaConceptParserHelper:
def is_atom(self):
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
def is_expected(self, token):
if self.is_matched():
def is_next(self, token):
if self.is_matched() or len(self.expected) == 0:
return False
token_value = BaseNodeParser.get_token_value(token)
# True if the next token is the one that is expected
# Or if the next token is a whitespace and the expected one is the one after
# (whitespace are sometimes not mandatory)
return token.str_value == self.expected[0].str_value or \
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
def is_expected(self, token):
if self.is_matched() or token.type == TokenKind.WHITESPACE:
return False
for expected in self.expected:
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
return True
return False
def expected_parameters(self):
return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected))
return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
def eat_token(self):
# No check, as it is used only after is_expected
def eat_token(self, until_token):
"""
eat until token 'until'
:param until_token:
:return:
"""
# No check, as it is used only after is_expected() or is_next()
while self.expected[0].str_value != until_token.str_value:
del self.expected[0]
del self.expected[0]
# return True is a whole sequence of keyword is eaten
@@ -143,7 +165,10 @@ class SyaConceptParserHelper:
if len(self.expected) == 0:
return True
return self.expected[0].startswith(VARIABLE_PREFIX)
# also return True at the end of a name sequence
# ... <var0> bar baz qux <var1>
# return True after 'qux', to indicate all the parameters from <var0> must be processed
return self.expected[0].type == TokenKind.VAR_DEF
def eat_parameter(self, parameter):
if self.is_matched() and parameter == self:
@@ -153,7 +178,7 @@ class SyaConceptParserHelper:
self.error = "No more parameter expected"
return
if not self.expected[0].startswith(VARIABLE_PREFIX):
if self.expected[0].type != TokenKind.VAR_DEF:
self.error = "Parameter was not expected"
return
@@ -202,6 +227,7 @@ class InFixToPostFix:
self.errors = [] # Not quite sure that I can handle more than one error
self.debug = []
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
def __repr__(self):
@@ -245,7 +271,6 @@ class InFixToPostFix:
Note that when we are parsing non recognized tokens,
we consider that the parenthesis are part of the non recognized
:param token:
:param stack:
:return:
"""
return isinstance(token, Token) and token.type == TokenKind.RPAR
@@ -268,10 +293,10 @@ class InFixToPostFix:
:return:
"""
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
if item.expected[0].startswith(VARIABLE_PREFIX):
if item.expected[0].type == TokenKind.VAR_DEF:
item.error = "Not enough suffix parameters"
else:
item.error = f"token '{item.expected[0]}' not found"
item.error = f"token '{item.expected[0].str_value}' not found"
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item)
@@ -328,6 +353,16 @@ class InFixToPostFix:
).pseudo_fix_source()
return source_code
def _transform_to_unrecognized(self, parser_helper):
# an Unrecognized when sent to out too prematurely
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
self.unrecognized_tokens = self.out.pop()
if parser_helper.remember_whitespace:
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
for i, token in enumerate(parser_helper.tokens):
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def get_errors(self):
res = []
res.extend(self.errors)
@@ -343,28 +378,28 @@ class InFixToPostFix:
self.is_locked = False
def manage_parameters_when_new_concept(self, temp_concept_node):
def manage_parameters_when_new_concept(self, parser_helper):
"""
When a new concept is create, we need to check what to do with the parameters
that were queued
:param temp_concept_node: new concept
:param parser_helper: new concept
:return:
"""
if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token:
if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
# The new concept expect some prefix parameters, but there's not enough
temp_concept_node.error = "Not enough prefix parameters"
parser_helper.error = "Not enough prefix parameters"
return
if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token:
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
# There are more parameters than needed by the new concept
# The others are either
# - parameters for the previous concept (if any)
# - concepts on their own
# - syntax error
# In all the cases, the only thing that matter is to pop what is expected by the new concept
for i in range(temp_concept_node.expected_parameters_before_first_token):
for i in range(parser_helper.expected_parameters_before_first_token):
self.parameters_list.pop()
temp_concept_node.parameters_list_at_init.extend(self.parameters_list)
parser_helper.parameters_list_at_init.extend(self.parameters_list)
return
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
@@ -385,14 +420,18 @@ class InFixToPostFix:
:return:
"""
# manage parenthesis that didn't find any match
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
# The parameter must be part the current concept being parsed
assert len(self._concepts()) != 0 # sanity check
current_concept = self._concepts()[-1]
while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX):
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
# eat everything that was expected
if len(self.parameters_list) == 0:
# current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'"
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
return
del self.parameters_list[0]
del current_concept.expected[0]
@@ -506,6 +545,11 @@ class InFixToPostFix:
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
if not current.precedence:
# precedence is not set (None or zero)
# Do not apply any rule
return False
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
return True
@@ -528,9 +572,55 @@ class InFixToPostFix:
:return:
"""
def _pop_stack(c):
while self.stack[-1] != c and not self._is_lpar(c):
self.pop_stack_to_out()
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
for current_concept in reversed(self._concepts()):
# As I may loose memory again ;-)
# it's a reversed loop to manage cases like
# if a plus b then ...
# The current concept is 'plus', but the token is 'then'
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
if current_concept.is_next(token):
current_concept.end = pos
current_concept.tokens.append(token)
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
if current_concept.expected[0].type == TokenKind.WHITESPACE:
# drop it. It's the case where an optional whitespace is missing
del (current_concept.expected[0])
else:
# error
# We are not parsing the concept we tought we were parsing.
# Transform the eaten tokens into unrecognized
# and discard the current SyaConceptParserHelper
# TODO: manage the pending LPAR, RPAR ?
self._transform_to_unrecognized(current_concept)
self.false_positives.append(current_concept)
self.stack.pop()
return False
if current_concept.is_expected(token):
# Fix the whitespace between var and expected if needed
# current_concept.expected[0] is '<var>'
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
current_concept.end = pos
self.manage_unrecognized()
# manage that some clones may have been forked
@@ -550,36 +640,33 @@ class InFixToPostFix:
self.parameters_list[:]))
return True # no need to continue
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
current = self.stack[-1]
if current.error:
self._transform_to_unrecognized(current)
self.false_positives.append(current)
self.stack.pop()
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
forked.handle_expected_token(token, pos)
else:
self.pop_stack_to_out()
self.manage_parameters()
if current_concept.eat_token():
while self.stack[-1] != current_concept and not self._is_lpar(current_concept):
self.pop_stack_to_out()
# maybe eat whitespace that was between <var> and expected token
if current_concept.expected[0].type == TokenKind.WHITESPACE:
del current_concept.expected[0]
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
# else:
# if token.type != TokenKind.WHITESPACE:
# # hack, because whitespaces are not correctly parsed in self.expected
# # KSI 2020/04/25
# # I no longer understand why we are in a loop (the reverse one)
# # if we are parsing a concept and the expected token does not match
# # The whole class should be in error
# self._add_error(UnexpectedTokenErrorNode(
# f"Failed to parse '{current_concept.concept.concept}'",
# token, current_concept.expected))
# return False
return False
def eat_token(self, token, pos):
@@ -692,10 +779,11 @@ class InFixToPostFix:
return False
def eat_concept(self, sya_concept_def, pos):
def eat_concept(self, sya_concept_def, token, pos):
"""
a concept is found
:param sya_concept_def:
:param token:
:param pos:
:return:
"""
@@ -704,37 +792,43 @@ class InFixToPostFix:
return
self.debug.append(sya_concept_def)
temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos)
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
# First, try to recognize the tokens that are waiting
self.manage_unrecognized()
for forked in self.forked:
# manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, pos)
forked.eat_concept(sya_concept_def, token, pos)
# then, check if this new concept is linked to the previous ones
# ie, is the previous concept fully matched ?
if temp_concept_node.expected_parameters_before_first_token == 0:
if parser_helper.expected_parameters_before_first_token == 0:
# => does not expect pending parameter (it's suffixed concept)
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
# => previous seems to have everything it needs in the parameter list
self.pop_stack_to_out()
if temp_concept_node.is_atom():
self._put_to_out(temp_concept_node.fix_concept())
if parser_helper.is_atom():
self._put_to_out(parser_helper.fix_concept())
else:
# call shunting yard algorithm
while self.i_can_pop(temp_concept_node):
while self.i_can_pop(parser_helper):
self.pop_stack_to_out()
if temp_concept_node.is_matched():
if parser_helper.is_matched():
# case of a prefix concept which has found happiness with self.parameters_list
# directly put it in out
self.manage_parameters_when_new_concept(temp_concept_node)
self._put_to_out(temp_concept_node.fix_concept())
self.manage_parameters_when_new_concept(parser_helper)
self._put_to_out(parser_helper.fix_concept())
else:
self.stack.append(temp_concept_node)
self.manage_parameters_when_new_concept(temp_concept_node)
self.stack.append(parser_helper)
self.manage_parameters_when_new_concept(parser_helper)
def eat_unrecognized(self, token, pos):
"""
@@ -762,18 +856,34 @@ class InFixToPostFix:
if len(self.stack) == 0 and len(self.out) == 0:
return # no need to pop the buffer, as no concept is found
while len(self.stack) > 0:
parser_helper = self.stack[-1]
# validate parenthesis
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
return None
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
if failed_to_match > 0:
# didn't manage to read all tokens.
# Transform them into unrecognized
self._transform_to_unrecognized(parser_helper)
self.false_positives.append(parser_helper)
self.stack.pop() # discard the parser helper
else:
self.pop_stack_to_out() # process it
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
while len(self.stack) > 0:
if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return None
self.pop_stack_to_out()
def clone(self):
clone = InFixToPostFix(self.context)
clone.is_locked = self.is_locked
@@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser):
try:
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
# little optim, no need to get the concept when parenthesis
# little optim, no need to lock, unlock or get the concept when parenthesis
for infix_to_postfix in res:
infix_to_postfix.eat_token(token, self.pos)
continue
@@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser):
if len(concepts) == 1:
for infix_to_postfix in res:
infix_to_postfix.eat_concept(concepts[0], self.pos)
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
continue
# make the cartesian product
@@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser):
for concept in concepts:
clone = infix_to_postfix.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
clone.eat_concept(concept, token, self.pos)
res = temp_res
finally:
@@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser):
to_insert = item
sequence.insert(0, to_insert)
if has_unrecognized:
# Manage some sick cases where missing parenthesis mess the order or the sequence
# example "foo bar(one plus two"
sequence.sort(key=attrgetter("start"))
ret.append(
self.sheerka.ret(
self.name,
-912
View File
@@ -1,912 +0,0 @@
# #####################################################################################################
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# # I don't directly use the project, but it helped me figure out
# # what to do.
# # Dejanović I., Milosavljević G., Vaderna R.:
# # Arpeggio: A flexible PEG parser for Python,
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
# #####################################################################################################
# from collections import namedtuple
# from dataclasses import dataclass
# from collections import defaultdict
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept, ConceptParts, DoNotResolve
# from core.tokenizer import TokenKind, Tokenizer, Token
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser, ErrorNode
# import core.utils
#
#
# class NonTerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, tokens, children=None):
# super().__init__(start, end, tokens)
# self.parsing_expression = parsing_expression
# self.children = children
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
# if len(self.children) > 0:
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
# else:
# sub_names = ""
# return name + sub_names
#
# def __eq__(self, other):
# if not isinstance(other, NonTerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.children == other.children
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.children))
#
#
# class TerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, value):
# super().__init__(start, end, source=value)
# self.parsing_expression = parsing_expression
# self.value = value
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or ""
# return name + f"'{self.value}'"
#
# def __eq__(self, other):
# if not isinstance(other, TerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.value == other.value
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.value))
#
#
# @dataclass()
# class UnknownConceptNode(ErrorNode):
# concept_key: str
#
#
# @dataclass()
# class TooManyConceptNode(ErrorNode):
# concept_key: str
#
#
# class ParsingExpression:
# def __init__(self, *args, **kwargs):
# self.elements = args
#
# nodes = kwargs.get('nodes', [])
# if not hasattr(nodes, '__iter__'):
# nodes = [nodes]
# self.nodes = nodes
#
# self.rule_name = kwargs.get('rule_name', '')
#
# def __eq__(self, other):
# if not isinstance(other, ParsingExpression):
# return False
#
# return self.rule_name == other.rule_name and self.elements == other.elements
#
# def __hash__(self):
# return hash((self.rule_name, self.elements))
#
# def parse(self, parser):
# return self._parse(parser)
#
# def add_rule_name_if_needed(self, text):
# return text + "=" + self.rule_name if self.rule_name else text
#
#
# class ConceptExpression(ParsingExpression):
# """
# Will match a concept
# It used only for rule definition
#
# When the grammar is created, it is replaced by the actual concept
# """
#
# def __init__(self, concept, rule_name=""):
# super().__init__(rule_name=rule_name)
# self.concept = concept
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"{self.concept}")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, ConceptExpression):
# return False
#
# if isinstance(self.concept, Concept):
# return self.concept.name == other.concept.name
#
# # when it's only the name of the concept
# return self.concept == other.concept
#
# def __hash__(self):
# return hash((self.concept, self.rule_name))
#
# @staticmethod
# def get_parsing_expression_from_name(name):
# tokens = Tokenizer(name)
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
# if len(nodes) == 1:
# return nodes[0]
# else:
# sequence = Sequence(nodes)
# sequence.nodes = nodes
# return sequence
#
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# # Try to match the concept using its name
# expr = self.get_parsing_expression_from_name(to_match.name)
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class ConceptGroupExpression(ConceptExpression):
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
# expr = OrderedChoice(nodes)
# expr.nodes = nodes
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class Sequence(ParsingExpression):
# """
# Will match sequence of parser expressions in exact order they are defined.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = parser.pos
#
# children = []
# for e in self.nodes:
# node = e.parse(parser)
# if node is None:
# return None
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class OrderedChoice(ParsingExpression):
# """
# Will match one among multiple
# It will stop at the first match (so the order of definition is important)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
#
# parser.seek(init_pos) # backtrack
#
# return None
#
# def __repr__(self):
# to_str = "| ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class Optional(ParsingExpression):
# """
# Will match or not the elements
# if many matches, will choose longest one
# If you need order, use Optional(OrderedChoice)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# if node.end > selected_node.end:
# selected_node = NonTerminalNode(
# self,
# node.start,
# node.end,
# parser.tokens[node.start: node.end + 1],
# [node])
#
# parser.seek(init_pos) # backtrack
#
# if selected_node.end != -1:
# parser.seek(selected_node.end)
# parser.next_token() # eat the tokens found
#
# return selected_node
#
# def __repr__(self):
# if len(self.elements) == 1:
# return f"{self.elements[0]}?"
# else:
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})?")
#
#
# class Repetition(ParsingExpression):
# """
# Base class for all repetition-like parser expressions (?,*,+)
# Args:
# eolterm(bool): Flag that indicates that end of line should
# terminate repetition match.
# """
#
# def __init__(self, *elements, **kwargs):
# super(Repetition, self).__init__(*elements, **kwargs)
# self.sep = kwargs.get('sep', None)
#
#
# class ZeroOrMore(Repetition):
# """
# ZeroOrMore will try to match parser expression specified zero or more
# times. It will never fail.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0:
# return NonTerminalNode(self, init_pos, -1, [], [])
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})*")
#
#
# class OneOrMore(Repetition):
# """
# OneOrMore will try to match parser expression specified one or more times.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0: # if nothing is found, it's an error
# return None
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})+")
#
#
# class UnorderedGroup(Repetition):
# """
# Will try to match all of the parsing expression in any order.
# """
#
# def _parse(self, parser):
# raise NotImplementedError()
#
# # def __repr__(self):
# # to_str = ", ".join(repr(n) for n in self.elements)
# # return f"({to_str})#"
#
#
# class Match(ParsingExpression):
# """
# Base class for all classes that will try to match something from the input.
# """
#
# def __init__(self, rule_name, root=False):
# super(Match, self).__init__(rule_name=rule_name, root=root)
#
# def parse(self, parser):
# result = self._parse(parser)
# return result
#
#
# class StrMatch(Match):
# """
# Matches a literal
# """
#
# def __init__(self, to_match, rule_name="", ignore_case=True):
# super(Match, self).__init__(rule_name=rule_name)
# self.to_match = to_match
# self.ignore_case = ignore_case
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, StrMatch):
# return False
#
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
#
# def _parse(self, parser):
# token = parser.get_token()
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
# else token.value == self.to_match
#
# if m:
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
# parser.next_token()
# return node
#
# return None
#
#
# class BnfNodeParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("BnfNode_old", 50)
# self.enabled = False
# if 'grammars' in kwargs:
# self.concepts_grammars = kwargs.get("grammars")
# elif 'sheerka' in kwargs:
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
# else:
# self.concepts_grammars = {}
#
# self.ignore_case = True
#
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context = None
# self.text = None
# self.sheerka = None
#
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.next_token()
# return error
#
# def reset_parser(self, context, text):
# self.context = context
# self.sheerka = context.sheerka
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
# except core.tokenizer.LexerError as e:
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
# return False
#
# self.token = None
# self.pos = -1
# self.next_token(False)
# return True
#
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
#
# def seek(self, pos):
# self.pos = pos
# self.token = self.tokens[self.pos]
# return True
#
# def rewind(self, offset, skip_whitespace=True):
# self.pos += offset
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
# self.pos -= 1
# self.token = self.tokens[self.pos]
#
# def initialize(self, context, concepts_definitions):
# """
# Adds a bunch of concepts, and how they can be recognized
# :param context: execution context
# :param concepts_definitions: dictionary of concept, concept_definition
# :return:
# """
#
# self.context = context
# self.sheerka = context.sheerka
# concepts_to_resolve = set()
#
# for concept, concept_def in concepts_definitions.items():
# # ## Gets the grammars
# context.log(f"Resolving grammar for '{concept}'", context.who)
# concept.init_key() # make sure that the key is initialized
# grammar = self.get_model(concept_def, concepts_to_resolve)
# self.concepts_grammars[concept] = grammar
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
#
# # ## Removes concepts with infinite recursions
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
# for concept in concepts_to_remove:
# concepts_to_resolve.remove(concept)
# del self.concepts_grammars[concept]
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
# else:
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
#
# def get_concept(self, concept_name):
# if concept_name in self.context.concepts:
# return self.context.concepts[concept_name]
# return self.sheerka.get_by_key(concept_name)
#
# def get_model(self, concept_def, concepts_to_resolve):
#
# # TODO
# # inner_get_model must not modify the initial ParsingExpression
# # A copy must be created
# def inner_get_model(expression):
# if isinstance(expression, Concept):
# if self.sheerka.isaset(self.context, expression):
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
# else:
# ret = ConceptExpression(expression, rule_name=expression.name)
# concepts_to_resolve.add(expression)
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
# if expression.rule_name is None or expression.rule_name == "":
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
# else expression.concept
# if isinstance(expression.concept, str):
# concept = self.get_concept(expression.concept)
# if self.sheerka.is_known(concept):
# expression.concept = concept
# concepts_to_resolve.add(expression.concept)
# ret = expression
# elif isinstance(expression, str):
# ret = StrMatch(expression, ignore_case=self.ignore_case)
# elif isinstance(expression, StrMatch):
# ret = expression
# if ret.ignore_case is None:
# ret.ignore_case = self.ignore_case
# elif isinstance(expression, Sequence) or \
# isinstance(expression, OrderedChoice) or \
# isinstance(expression, ZeroOrMore) or \
# isinstance(expression, OneOrMore) or \
# isinstance(expression, Optional):
# ret = expression
# ret.nodes = [inner_get_model(e) for e in ret.elements]
# else:
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
#
# # Translate separator expression.
# if isinstance(expression, Repetition) and expression.sep:
# expression.sep = inner_get_model(expression.sep)
#
# return ret
#
# model = inner_get_model(concept_def)
#
# return model
#
# def detect_infinite_recursion(self, concepts_to_resolve):
#
# # infinite recursion matcher
# def _is_infinite_recursion(ref_concept, node):
# if isinstance(node, ConceptExpression):
# if node.concept == ref_concept:
# return True
#
# if isinstance(node.concept, str):
# to_match = self.get_concept(node.concept)
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return False
# else:
# to_match = node.concept
#
# if to_match not in self.concepts_grammars:
# return False
#
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
#
# if isinstance(node, OrderedChoice):
# return _is_infinite_recursion(ref_concept, node.nodes[0])
#
# if isinstance(node, Sequence):
# for node in node.nodes:
# if _is_infinite_recursion(ref_concept, node):
# return True
# return False
#
# return False
#
# removed_concepts = []
# for e in concepts_to_resolve:
# if isinstance(e, str):
# e = self.get_concept(e)
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
# continue
#
# if e not in self.concepts_grammars:
# continue
#
# to_resolve = self.concepts_grammars[e]
# if _is_infinite_recursion(e, to_resolve):
# removed_concepts.append(e)
# return removed_concepts
#
# def parse(self, context, parser_input):
# if parser_input == "":
# return context.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
# )
#
# if not self.reset_parser(context, parser_input):
# return self.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
#
# concepts_found = [[]]
# unrecognized_tokens = None
# has_unrecognized = False
#
# # actually list of list
# # The first dimension is the number of possibilities found
# # The second dimension is the number of concepts found, under one possibility
# #
# # Example 1
# # concept foo : 'one' 'two'
# # concept bar : 'one' 'two'
# # input 'one two' -> will produce two possibilities (foo and bar).
# #
# # Example 2
# # concept foo : 'one'
# # concept bar : 'two'
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
#
# while True:
# init_pos = self.pos
# res = []
#
# for concept, grammar in self.concepts_grammars.items():
# self.seek(init_pos)
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
# if node is not None and node.end != -1:
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
# concept_node = ConceptNode(
# updated_concept,
# node.start,
# node.end,
# self.tokens[node.start: node.end + 1],
# None,
# node)
# res.append(concept_node)
#
# if len(res) == 0: # not recognized
# self.seek(init_pos)
# if unrecognized_tokens:
# unrecognized_tokens.add_token(self.get_token(), init_pos)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
#
# if not self.next_token(False):
# break
#
# else: # some concepts are recognized
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
# unrecognized_tokens = None
#
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
# concepts_found = core.utils.product(concepts_found, res)
#
# # loop
# self.seek(res[0].end)
# if not self.next_token(False):
# break
#
# # Fix the source for unrecognized tokens
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
#
# # else
# # returns as many ReturnValue than choices found
# ret = []
# for choice in concepts_found:
# ret.append(
# self.sheerka.ret(
# self.name,
# not has_unrecognized,
# self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input,
# body=choice,
# try_parsed=choice)))
#
# if len(ret) == 1:
# self.log_result(context, parser_input, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input, ret)
# return ret
#
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
# """
# Updates the properties of the concept
# Goes in recursion if the property is a concept
# """
#
# # this cache is to make sure that we return the same concept for the same ConceptExpression
# _underlying_value_cache = {}
#
# def _add_prop(_concept, prop_name, value):
# """
# Adds a new entry,
# makes a list if the property already exists
# """
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
# # new entry
# _concept.compiled[prop_name] = value
# else:
# # make a list if there was a value
# previous_value = _concept.compiled[prop_name]
# if isinstance(previous_value, list):
# previous_value.append(value)
# else:
# new_value = [previous_value, value]
# _concept.compiled[prop_name] = new_value
#
# def _look_for_concept_match(_underlying):
# """
# At some point, there is either an StrMatch or a ConceptMatch,
# that allowed the recognition.
# Look for the ConceptMatch, with recursion if needed
# """
# if isinstance(_underlying.parsing_expression, ConceptExpression):
# return _underlying
#
# if not isinstance(_underlying, NonTerminalNode):
# return None
#
# if len(_underlying.children) != 1:
# return None
#
# return _look_for_concept_match(_underlying.children[0])
#
# def _get_underlying_value(_underlying):
# concept_match_node = _look_for_concept_match(_underlying)
# if concept_match_node:
# # the value is a concept
# if id(concept_match_node) in _underlying_value_cache:
# result = _underlying_value_cache[id(concept_match_node)]
# else:
# ref_tpl = concept_match_node.parsing_expression.concept
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
# _underlying_value_cache[id(concept_match_node)] = result
# else:
# # the value is a string
# result = DoNotResolve(_underlying.source)
#
# return result
#
# def _process_rule_name(_concept, _underlying):
# if _underlying.parsing_expression.rule_name:
# value = _get_underlying_value(_underlying)
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
# _concept.metadata.need_validation = True
#
# if isinstance(_underlying, NonTerminalNode):
# for child in _underlying.children:
# _process_rule_name(_concept, child)
#
# key = (template.key, template.id) if template.id else template.key
# concept = sheerka.new(key)
# if init_empty_body and concept.metadata.body is None:
# value = _get_underlying_value(underlying)
# concept.compiled[ConceptParts.BODY] = value
# if underlying.parsing_expression.rule_name:
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
# # KSI : Why don't we set concept.metadata.need_validation to True ?
#
# if isinstance(underlying, NonTerminalNode):
# for node in underlying.children:
# _process_rule_name(concept, node)
#
# return concept
#
# def encode_grammar(self, grammar):
# """
# Transform the grammar into something that can easily can be serialized
# :param grammar:
# :return:
# """
#
# def _encode(expression):
# if isinstance(expression, StrMatch):
# res = f"'{expression.to_match}'"
#
# elif isinstance(expression, ConceptExpression):
# res = core.utils.str_concept(expression.concept)
#
# elif isinstance(expression, Sequence):
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, OrderedChoice):
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, Optional):
# res = _encode(expression.nodes[0]) + "?"
#
# elif isinstance(expression, ZeroOrMore):
# res = _encode(expression.nodes[0]) + "*"
#
# elif isinstance(expression, OneOrMore):
# res = _encode(expression.nodes[0]) + "+"
#
# if expression.rule_name:
# res += "=" + expression.rule_name
#
# return res
#
# result = {}
# for k, v in grammar.items():
# key = core.utils.str_concept(k)
# value = _encode(v)
# result[key] = value
# return result
#
# @staticmethod
# def get_bests(results):
# """
# Returns the result that is the longest
# :param results:
# :return:
# """
# by_end_pos = defaultdict(list)
# for result in results:
# by_end_pos[result.end].append(result)
#
# return by_end_pos[max(by_end_pos)]
#
#
# class ParsingExpressionVisitor:
# """
# visit ParsingExpression
# """
#
# def visit(self, parsing_expression):
# name = parsing_expression.__class__.__name__
#
# method = 'visit_' + name
# visitor = getattr(self, method, self.generic_visit)
# return visitor(parsing_expression)
#
# def generic_visit(self, parsing_expression):
# if hasattr(self, "visit_all"):
# self.visit_all(parsing_expression)
#
# for node in parsing_expression.elements:
# if isinstance(node, Concept):
# self.visit(ConceptExpression(node.key or node.name))
# elif isinstance(node, str):
# self.visit(StrMatch(node))
# else:
# self.visit(node)
-108
View File
@@ -1,108 +0,0 @@
# # try to match something like
# # ConceptNode 'plus' ConceptNode
# #
# # Replaced by SyaNodeParser
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind, Token
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from core.concept import VARIABLE_PREFIX
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# class ConceptsWithConceptsParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("ConceptsWithConcepts", 25)
# self.enabled = False
#
# @staticmethod
# def get_tokens(nodes):
# tokens = []
#
# for node in nodes:
# if isinstance(node, ConceptNode):
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
# else:
# for token in node.tokens:
# if token.type == TokenKind.EOF:
# break
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
# continue
# else:
# tokens.append(token)
#
# return tokens
#
# @staticmethod
# def get_key(nodes):
# key = ""
# index = 0
# for node in nodes:
# if key:
# key += " "
#
# if isinstance(node, UnrecognizedTokensNode):
# key += node.source.strip()
# else:
# key += f"{VARIABLE_PREFIX}{index}"
# index += 1
#
# return key
#
# def finalize_concept(self, context, concept, nodes):
# index = 0
# for node in nodes:
#
# if isinstance(node, ConceptNode):
# prop_name = list(concept.props.keys())[index]
# concept.compiled[prop_name] = node.concept
# context.log(
# f"Setting property '{prop_name}='{node.concept}'.",
# self.name)
# index += 1
# elif isinstance(node, SourceCodeNode):
# prop_name = list(concept.props.keys())[index]
# sheerka = context.sheerka
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
# context.log(
# f"Setting property '{prop_name}'='Python({node.source})'.",
# self.name)
# index += 1
#
# return concept
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
# if not nodes:
# return None
#
# concept_key = self.get_key(nodes)
# concept = sheerka.new(concept_key)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# return sheerka.ret(
# self.name,
# False,
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
#
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# for concept in concepts:
# self.finalize_concept(context, concept, nodes)
#
# res = []
# for concept in concepts:
# res.append(sheerka.ret(
# self.name,
# True,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=concept,
# try_parsed=None)))
#
# return res[0] if len(res) == 1 else res
-163
View File
@@ -1,163 +0,0 @@
# # to be replaced by SyaNodeParser
# import ast
#
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind
# from parsers.BaseNodeParser import SourceCodeNode
# from parsers.BaseParser import BaseParser
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
# import core.utils
# from parsers.PythonParser import PythonParser
#
# concept_lexer_parser = BnfNodeParser()
#
#
# class MultipleConceptsParser(BaseParser):
# """
# Parser that will take the result of BnfNodeParser and
# try to resolve the unrecognized tokens token by token
#
# It is a success when it returns a list ConceptNode exclusively
# """
#
# def __init__(self, **kwargs):
# BaseParser.__init__(self, "MultipleConcepts", 45)
# self.enabled = False
#
# @staticmethod
# def finalize(nodes_found, unrecognized_tokens):
# if not unrecognized_tokens:
# return nodes_found, unrecognized_tokens
#
# unrecognized_tokens.fix_source()
# if unrecognized_tokens.not_whitespace():
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
#
# return nodes_found, None
#
# @staticmethod
# def create_or_add(unrecognized_tokens, token, index):
# if unrecognized_tokens:
# unrecognized_tokens.add_token(token, index)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
# return unrecognized_tokens
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
# if not nodes:
# return None
#
# nodes_found = [[]]
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# unrecognized_tokens = None
# i = 0
#
# while i < len(node.tokens):
#
# token_index = node.start + i
# token = node.tokens[i]
#
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
# if concepts_nodes is not None:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
# i += 1
# continue
#
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
# if source_code_node:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, [source_code_node])
# i += len(source_code_node.tokens)
# continue
#
# # not a concept nor some source code
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
# i += 1
#
# # finish processing if needed
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
#
# else:
# nodes_found = core.utils.product(nodes_found, [node])
#
# ret = []
# for choice in nodes_found:
# ret.append(
# sheerka.ret(
# self.name,
# concepts_only,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=choice,
# try_parsed=None))
# )
#
# if len(ret) == 1:
# self.log_result(context, parser_input.source, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input.source, ret)
# return ret
#
# @staticmethod
# def get_concepts_nodes(context, index, token):
# """
# Tries to recognize a concept
# from the univers of all known concepts
# """
#
# if token.type != TokenKind.IDENTIFIER:
# return None
#
# concept = context.new_concept(token.value)
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
# return concepts_nodes
#
# return None
#
# @staticmethod
# def get_source_code_node(context, index, tokens):
# """
# Tries to recognize source code.
# For the time being, only Python is supported
# :param context:
# :param tokens:
# :param index:
# :return:
# """
#
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
# return None
#
# end_index = len(tokens)
# while end_index > 0:
# parser = PythonParser()
# tokens_to_parse = tokens[:end_index]
# res = parser.parse(context, tokens_to_parse)
# if res.status:
# # only expression are accepted
# ast_ = res.value.value.ast_
# if not isinstance(ast_, ast.Expression):
# return None
# try:
# compiled = compile(ast_, "<string>", "eval")
# eval(compiled, {}, {})
# except Exception:
# return None
#
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
# end_index -= 1
#
# return None