Fixed SyaNodeParser false positive recognition issue

This commit is contained in:
2020-05-15 10:36:05 +02:00
parent 6e343ba996
commit 5489ef00b9
24 changed files with 484 additions and 5741 deletions
-18
View File
@@ -349,9 +349,6 @@ class EnumerationConcept(Concept):
self.set_value(ConceptParts.BODY, iteration) self.set_value(ConceptParts.BODY, iteration)
self.metadata.is_evaluated = True self.metadata.is_evaluated = True
# def __iter__(self):
# return iter(self.body)
class ListConcept(Concept): class ListConcept(Concept):
def __init__(self, items=None): def __init__(self, items=None):
@@ -362,21 +359,6 @@ class ListConcept(Concept):
def append(self, obj): def append(self, obj):
self.body.append(obj) self.body.append(obj)
# def __len__(self):
# return len(self.body)
#
# def __getitem__(self, key):
# return self.body[key]
#
# def __setitem__(self, key, value):
# self.body[key] = value
#
# def __iter__(self):
# return iter(self.body)
#
# def __contains__(self, item):
# return item in self.body
class FilteredConcept(Concept): class FilteredConcept(Concept):
def __init__(self, filtered=None, iterable=None, predicate=None): def __init__(self, filtered=None, iterable=None, predicate=None):
+1
View File
@@ -326,6 +326,7 @@ def ensure_evaluated(context, concept):
return evaluated return evaluated
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers): def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
""" """
Using parsers, try to recognize concepts from source Using parsers, try to recognize concepts from source
+3 -3
View File
@@ -221,7 +221,7 @@ class Concept:
Create the key for this concept. Create the key for this concept.
Must be called only when the concept if fully initialized Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake The method is not called 'set_key' to make sure that no other class set the key by mistake
:param tokens: :param tokens:
:return: :return:
""" """
@@ -248,8 +248,8 @@ class Concept:
if token.value in variables: if token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value)) key += VARIABLE_PREFIX + str(variables.index(token.value))
else: else:
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value #value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += value key += token.value
first = False first = False
self.metadata.key = key self.metadata.key = key
@@ -56,12 +56,6 @@ class SheerkaCreateNewConcept:
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body resolved_concepts_by_first_keyword = init_ret_value.body
# update concept definition by key
# init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True)
# if not init_sya_ret_value.status:
# return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
# concepts_by_first_keyword = init_sya_ret_value.body
concept.freeze_definition_hash() concept.freeze_definition_hash()
cache_manager.add_concept(concept) cache_manager.add_concept(concept)
@@ -74,21 +68,3 @@ class SheerkaCreateNewConcept:
# process the return if needed # process the return if needed
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret return ret
# def load_concepts_nodes_definitions(self, context):
# """
# Gets from sdp what is need to parse nodes
# :return:
# """
# sdp = self.sheerka.sdp
#
# concepts_by_first_keyword = sdp.get(
# self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# resolved_concepts_by_first_keyword = init_ret_value.body
#
# return concepts_by_first_keyword, resolved_concepts_by_first_keyword
+4 -3
View File
@@ -1,8 +1,9 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
import core.utils import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
NO_MATCH = "** No Match **" NO_MATCH = "** No Match **"
class SheerkaExecute: class SheerkaExecute:
""" """
Manage the execution of a process flow Manage the execution of a process flow
@@ -58,7 +59,8 @@ class SheerkaExecute:
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens" # else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
# execution_context.log(f"Parsing {debug_text}") # execution_context.log(f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context: with execution_context.push(desc=f"Parsing using {parser.name}",
logger=parser.verbose_log) as sub_context:
sub_context.add_inputs(to_parse=to_parse) sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse) res = parser.parse(sub_context, to_parse)
if res is not None: if res is not None:
@@ -86,7 +88,6 @@ class SheerkaExecute:
stop_processing = True stop_processing = True
sub_context.add_values(return_values=res) sub_context.add_values(return_values=res)
if stop_processing: if stop_processing:
break # Do not try the other priorities if a match is found break # Do not try the other priorities if a match is found
@@ -35,7 +35,7 @@ class SheerkaModifyConcept:
# TODO : update concept by first keyword # TODO : update concept by first keyword
# TODO : update resolved by first keyword # TODO : update resolved by first keyword
# TODO : update concets grammars # TODO : update concepts grammars
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret return ret
@@ -1,8 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import List from typing import List
from sdp.sheerkaSerializer import Serializer
@dataclass @dataclass
class Variable: class Variable:
-48
View File
@@ -60,10 +60,6 @@ class Sheerka(Concept):
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token) self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
# # Cache for concepts grammars
# # To be shared between BNFNode parsers instances
# self.concepts_grammars = {}
# a concept can be instantiated # a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance # ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts # TODO: manage contexts
@@ -303,27 +299,6 @@ class Sheerka(Concept):
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body) self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# sya = self.bnf.resolve_sya_associativity_and_precedence()
# self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya)
#
#
# self.concepts_by_first_keyword, \
# self.resolved_concepts_by_first_keyword = \
# self.create_new_concept_handler.load_concepts_nodes_definitions(context)
# self.concepts_by_first_keyword = self.sdp.get_safe(
# self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# self.sya_definitions = self.sdp.get_safe(
# self.CONCEPTS_SYA_DEFINITION_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# self.resolved_concepts_by_first_keyword = init_ret_value.body
def reset(self, cache_only=False): def reset(self, cache_only=False):
self.cache_manager.clear() self.cache_manager.clear()
self.cache_manager.cache_only = cache_only self.cache_manager.cache_only = cache_only
@@ -346,7 +321,6 @@ class Sheerka(Concept):
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context: with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name)) user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED)) reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
# execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED)
steps = [ steps = [
BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.BEFORE_PARSING,
@@ -525,28 +499,6 @@ class Sheerka(Concept):
return concept return concept
#
# def get(self, concept_key, concept_id=None):
# """
# Tries to find a concept
# What is return must be used a template for another concept.
# You must not modify the returned concept
# :param concept_key: key of the concept
# :param concept_id: when multiple concepts with the same key, use the id
# :return:
# """
#
# by_key = self.get_by_key(concept_key)
# if self.is_known(by_key):
# return by_key
#
# # else return by name
# by_name = self.get_by_name(concept_key)
# if self.is_known(by_name):
# return by_name
#
# return by_key # return not found for key
def get_by_key(self, concept_key, concept_id=None): def get_by_key(self, concept_key, concept_id=None):
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id) return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
+32 -2
View File
@@ -1,4 +1,4 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from enum import Enum from enum import Enum
@@ -48,6 +48,7 @@ class TokenKind(Enum):
DEGREE = "degree" # ° DEGREE = "degree" # °
WORD = "word" WORD = "word"
EQUALSEQUALS = "==" EQUALSEQUALS = "=="
VAR_DEF = "__var__"
@dataclass() @dataclass()
@@ -58,6 +59,8 @@ class Token:
line: int line: int
column: int column: int
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self): def __repr__(self):
if self.type == TokenKind.IDENTIFIER: if self.type == TokenKind.IDENTIFIER:
value = str(self.value) value = str(self.value)
@@ -72,6 +75,23 @@ class Token:
return f"Token({value})" return f"Token({value})"
@property
def str_value(self):
if self._str_value:
return self._str_value
if self.type == TokenKind.STRING:
self._str_value = self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
self._str_value = self.value.value
else:
self._str_value = str(self.value)
return self._str_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
@dataclass() @dataclass()
class LexerError(Exception): class LexerError(Exception):
@@ -101,12 +121,13 @@ class Tokenizer:
KEYWORDS = set(x.value for x in Keywords) KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text, parse_word=False): def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text self.text = text
self.text_len = len(text) self.text_len = len(text)
self.column = 1 self.column = 1
self.line = 1 self.line = 1
self.i = 0 self.i = 0
self.yield_eof = yield_eof
self.parse_word = parse_word self.parse_word = parse_word
def __iter__(self): def __iter__(self):
@@ -134,6 +155,7 @@ class Tokenizer:
self.i += 1 self.i += 1
self.column += 1 self.column += 1
elif c == "_": elif c == "_":
from core.concept import VARIABLE_PREFIX
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha(): if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i) identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -141,6 +163,13 @@ class Tokenizer:
yield Token(token_type, value, self.i, self.line, self.column) yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier) self.i += len(identifier)
self.column += len(identifier) self.column += len(identifier)
elif self.i + 7 < self.text_len and \
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
self.text[self.i + 7].isdigit():
number = self.eat_number(self.i + 7)
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
self.i += 7 + len(number)
self.column += 7 + len(number)
else: else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column) yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1 self.i += 1
@@ -308,6 +337,7 @@ class Tokenizer:
else: else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column) raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
if self.yield_eof:
yield Token(TokenKind.EOF, "", self.i, self.line, self.column) yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept(self, start, line, column): def eat_concept(self, start, line, column):
+2 -2
View File
@@ -91,7 +91,7 @@ class AtomConceptParserHelper:
self.debug.append(token) self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token): if self.expected_tokens[0] != token.str_value:
self.errors.append(UnexpectedTokenErrorNode( self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'", f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token, token,
@@ -119,7 +119,7 @@ class AtomConceptParserHelper:
forked.eat_concept(concept, pos) forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos) concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1] expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
if not expected: if not expected:
# the concept is already matched # the concept is already matched
+20 -12
View File
@@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode):
self.is_frozen = False self.is_frozen = False
self.parenthesis_count = 0 self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos): def add_token(self, token, pos):
if self.is_frozen: if self.is_frozen:
raise Exception("The node is frozen") raise Exception("The node is frozen")
@@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode):
return self return self
def pop(self, token_kind):
if self.is_frozen:
raise Exception("The node is frozen")
if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
self.tokens.pop()
if len(self.tokens) == 0:
self.reset()
else:
self.end -= 1
def has_open_paren(self):
return self.parenthesis_count > 0
def not_whitespace(self): def not_whitespace(self):
return not self.is_whitespace() return not self.is_whitespace()
@@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode):
def is_empty(self): def is_empty(self):
return len(self.tokens) == 0 return len(self.tokens) == 0
def last_token_type(self):
if len(self.tokens) == 0:
return None
return self.tokens[-1].type
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, utnode): if isinstance(other, utnode):
return self.start == other.start and \ return self.start == other.start and \
@@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser):
return custom_concepts if custom else None return custom_concepts if custom else None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
@staticmethod @staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False): def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
""" """
+189 -74
View File
@@ -1,15 +1,16 @@
from collections import namedtuple from collections import namedtuple
from dataclasses import dataclass, field from dataclasses import dataclass, field
from operator import attrgetter
from typing import List from typing import List
from core import builtin_helpers from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import Token, TokenKind from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode from parsers.BaseParser import ErrorNode
PARSERS = ["BnfNode", "AtomNode", "Python"] PARSERS = ["BnfNode", "AtomNode", "Python"]
@@ -88,10 +89,13 @@ class SyaConceptParserHelper:
concept: Concept concept: Concept
start: int # position of the token in the tokenizer (Caution, it is not token.index) start: int # position of the token in the tokenizer (Caution, it is not token.index)
end: int = field(default=-1, repr=False, compare=False, hash=None) end: int = field(default=-1, repr=False, compare=False, hash=None)
expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None) expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None) expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None) potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None) parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
error: str = None error: str = None
def __post_init__(self): def __post_init__(self):
@@ -99,17 +103,20 @@ class SyaConceptParserHelper:
if self.end == -1: if self.end == -1:
self.end = self.start self.end = self.start
first_keyword_found = False first_keyword_found = None
for name in concept.key.split(): for token in Tokenizer(concept.key, yield_eof=False):
if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found: if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
first_keyword_found = True first_keyword_found = token
if first_keyword_found: if first_keyword_found:
self.expected.append(name) self.expected.append(token)
else: else:
self.last_token_before_first_token = token
if token.type != TokenKind.WHITESPACE:
self.expected_parameters_before_first_token += 1 self.expected_parameters_before_first_token += 1
self.eat_token() # remove the fist token self.eat_token(first_keyword_found) # remove the first token
self.tokens.append(first_keyword_found)
def is_matched(self): def is_matched(self):
return len(self.expected) == 0 return len(self.expected) == 0
@@ -117,23 +124,38 @@ class SyaConceptParserHelper:
def is_atom(self): def is_atom(self):
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0 return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
def is_expected(self, token): def is_next(self, token):
if self.is_matched(): if self.is_matched() or len(self.expected) == 0:
return False return False
token_value = BaseNodeParser.get_token_value(token) # True if the next token is the one that is expected
# Or if the next token is a whitespace and the expected one is the one after
# (whitespace are sometimes not mandatory)
return token.str_value == self.expected[0].str_value or \
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
def is_expected(self, token):
if self.is_matched() or token.type == TokenKind.WHITESPACE:
return False
for expected in self.expected: for expected in self.expected:
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value: if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
return True return True
return False return False
def expected_parameters(self): def expected_parameters(self):
return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected)) return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
def eat_token(self): def eat_token(self, until_token):
# No check, as it is used only after is_expected """
eat until token 'until'
:param until_token:
:return:
"""
# No check, as it is used only after is_expected() or is_next()
while self.expected[0].str_value != until_token.str_value:
del self.expected[0]
del self.expected[0] del self.expected[0]
# return True is a whole sequence of keyword is eaten # return True is a whole sequence of keyword is eaten
@@ -143,7 +165,10 @@ class SyaConceptParserHelper:
if len(self.expected) == 0: if len(self.expected) == 0:
return True return True
return self.expected[0].startswith(VARIABLE_PREFIX) # also return True at the end of a name sequence
# ... <var0> bar baz qux <var1>
# return True after 'qux', to indicate all the parameters from <var0> must be processed
return self.expected[0].type == TokenKind.VAR_DEF
def eat_parameter(self, parameter): def eat_parameter(self, parameter):
if self.is_matched() and parameter == self: if self.is_matched() and parameter == self:
@@ -153,7 +178,7 @@ class SyaConceptParserHelper:
self.error = "No more parameter expected" self.error = "No more parameter expected"
return return
if not self.expected[0].startswith(VARIABLE_PREFIX): if self.expected[0].type != TokenKind.VAR_DEF:
self.error = "Parameter was not expected" self.error = "Parameter was not expected"
return return
@@ -202,6 +227,7 @@ class InFixToPostFix:
self.errors = [] # Not quite sure that I can handle more than one error self.errors = [] # Not quite sure that I can handle more than one error
self.debug = [] self.debug = []
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
def __repr__(self): def __repr__(self):
@@ -245,7 +271,6 @@ class InFixToPostFix:
Note that when we are parsing non recognized tokens, Note that when we are parsing non recognized tokens,
we consider that the parenthesis are part of the non recognized we consider that the parenthesis are part of the non recognized
:param token: :param token:
:param stack:
:return: :return:
""" """
return isinstance(token, Token) and token.type == TokenKind.RPAR return isinstance(token, Token) and token.type == TokenKind.RPAR
@@ -268,10 +293,10 @@ class InFixToPostFix:
:return: :return:
""" """
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error: if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
if item.expected[0].startswith(VARIABLE_PREFIX): if item.expected[0].type == TokenKind.VAR_DEF:
item.error = "Not enough suffix parameters" item.error = "Not enough suffix parameters"
else: else:
item.error = f"token '{item.expected[0]}' not found" item.error = f"token '{item.expected[0].str_value}' not found"
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item) self.out.insert(item.potential_pos, item)
@@ -328,6 +353,16 @@ class InFixToPostFix:
).pseudo_fix_source() ).pseudo_fix_source()
return source_code return source_code
def _transform_to_unrecognized(self, parser_helper):
# an Unrecognized when sent to out too prematurely
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
self.unrecognized_tokens = self.out.pop()
if parser_helper.remember_whitespace:
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
for i, token in enumerate(parser_helper.tokens):
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def get_errors(self): def get_errors(self):
res = [] res = []
res.extend(self.errors) res.extend(self.errors)
@@ -343,28 +378,28 @@ class InFixToPostFix:
self.is_locked = False self.is_locked = False
def manage_parameters_when_new_concept(self, temp_concept_node): def manage_parameters_when_new_concept(self, parser_helper):
""" """
When a new concept is create, we need to check what to do with the parameters When a new concept is create, we need to check what to do with the parameters
that were queued that were queued
:param temp_concept_node: new concept :param parser_helper: new concept
:return: :return:
""" """
if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token: if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
# The new concept expect some prefix parameters, but there's not enough # The new concept expect some prefix parameters, but there's not enough
temp_concept_node.error = "Not enough prefix parameters" parser_helper.error = "Not enough prefix parameters"
return return
if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token: if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
# There are more parameters than needed by the new concept # There are more parameters than needed by the new concept
# The others are either # The others are either
# - parameters for the previous concept (if any) # - parameters for the previous concept (if any)
# - concepts on their own # - concepts on their own
# - syntax error # - syntax error
# In all the cases, the only thing that matter is to pop what is expected by the new concept # In all the cases, the only thing that matter is to pop what is expected by the new concept
for i in range(temp_concept_node.expected_parameters_before_first_token): for i in range(parser_helper.expected_parameters_before_first_token):
self.parameters_list.pop() self.parameters_list.pop()
temp_concept_node.parameters_list_at_init.extend(self.parameters_list) parser_helper.parameters_list_at_init.extend(self.parameters_list)
return return
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token # len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
@@ -385,14 +420,18 @@ class InFixToPostFix:
:return: :return:
""" """
# manage parenthesis that didn't find any match
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
# The parameter must be part the current concept being parsed # The parameter must be part the current concept being parsed
assert len(self._concepts()) != 0 # sanity check assert len(self._concepts()) != 0 # sanity check
current_concept = self._concepts()[-1] current_concept = self._concepts()[-1]
while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX): while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
# eat everything that was expected # eat everything that was expected
if len(self.parameters_list) == 0: if len(self.parameters_list) == 0:
# current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'" current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
return return
del self.parameters_list[0] del self.parameters_list[0]
del current_concept.expected[0] del current_concept.expected[0]
@@ -506,6 +545,11 @@ class InFixToPostFix:
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No: if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start)) self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
if not current.precedence:
# precedence is not set (None or zero)
# Do not apply any rule
return False
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence: if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
return True return True
@@ -528,9 +572,55 @@ class InFixToPostFix:
:return: :return:
""" """
def _pop_stack(c):
while self.stack[-1] != c and not self._is_lpar(c):
self.pop_stack_to_out()
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
for current_concept in reversed(self._concepts()): for current_concept in reversed(self._concepts()):
# As I may loose memory again ;-)
# it's a reversed loop to manage cases like
# if a plus b then ...
# The current concept is 'plus', but the token is 'then'
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
if current_concept.is_next(token):
current_concept.end = pos
current_concept.tokens.append(token)
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
if current_concept.expected[0].type == TokenKind.WHITESPACE:
# drop it. It's the case where an optional whitespace is missing
del (current_concept.expected[0])
else:
# error
# We are not parsing the concept we tought we were parsing.
# Transform the eaten tokens into unrecognized
# and discard the current SyaConceptParserHelper
# TODO: manage the pending LPAR, RPAR ?
self._transform_to_unrecognized(current_concept)
self.false_positives.append(current_concept)
self.stack.pop()
return False
if current_concept.is_expected(token): if current_concept.is_expected(token):
# Fix the whitespace between var and expected if needed
# current_concept.expected[0] is '<var>'
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
current_concept.end = pos current_concept.end = pos
self.manage_unrecognized() self.manage_unrecognized()
# manage that some clones may have been forked # manage that some clones may have been forked
@@ -550,36 +640,33 @@ class InFixToPostFix:
self.parameters_list[:])) self.parameters_list[:]))
return True # no need to continue return True # no need to continue
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
current = self.stack[-1]
if current.error:
self._transform_to_unrecognized(current)
self.false_positives.append(current)
self.stack.pop()
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
forked.handle_expected_token(token, pos)
else:
self.pop_stack_to_out() self.pop_stack_to_out()
self.manage_parameters() self.manage_parameters()
if current_concept.eat_token(): # maybe eat whitespace that was between <var> and expected token
while self.stack[-1] != current_concept and not self._is_lpar(current_concept): if current_concept.expected[0].type == TokenKind.WHITESPACE:
self.pop_stack_to_out() del current_concept.expected[0]
if self._is_lpar(self.stack[-1]): if current_concept.eat_token(token):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) _pop_stack(current_concept)
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
return True return True
# else:
# if token.type != TokenKind.WHITESPACE:
# # hack, because whitespaces are not correctly parsed in self.expected
# # KSI 2020/04/25
# # I no longer understand why we are in a loop (the reverse one)
# # if we are parsing a concept and the expected token does not match
# # The whole class should be in error
# self._add_error(UnexpectedTokenErrorNode(
# f"Failed to parse '{current_concept.concept.concept}'",
# token, current_concept.expected))
# return False
return False return False
def eat_token(self, token, pos): def eat_token(self, token, pos):
@@ -692,10 +779,11 @@ class InFixToPostFix:
return False return False
def eat_concept(self, sya_concept_def, pos): def eat_concept(self, sya_concept_def, token, pos):
""" """
a concept is found a concept is found
:param sya_concept_def: :param sya_concept_def:
:param token:
:param pos: :param pos:
:return: :return:
""" """
@@ -704,37 +792,43 @@ class InFixToPostFix:
return return
self.debug.append(sya_concept_def) self.debug.append(sya_concept_def)
temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos) parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
# First, try to recognize the tokens that are waiting # First, try to recognize the tokens that are waiting
self.manage_unrecognized() self.manage_unrecognized()
for forked in self.forked: for forked in self.forked:
# manage the fact that some clone may have been forked # manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, pos) forked.eat_concept(sya_concept_def, token, pos)
# then, check if this new concept is linked to the previous ones # then, check if this new concept is linked to the previous ones
# ie, is the previous concept fully matched ? # ie, is the previous concept fully matched ?
if temp_concept_node.expected_parameters_before_first_token == 0: if parser_helper.expected_parameters_before_first_token == 0:
# => does not expect pending parameter (it's suffixed concept) # => does not expect pending parameter (it's suffixed concept)
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1: while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
# => previous seems to have everything it needs in the parameter list # => previous seems to have everything it needs in the parameter list
self.pop_stack_to_out() self.pop_stack_to_out()
if temp_concept_node.is_atom(): if parser_helper.is_atom():
self._put_to_out(temp_concept_node.fix_concept()) self._put_to_out(parser_helper.fix_concept())
else: else:
# call shunting yard algorithm # call shunting yard algorithm
while self.i_can_pop(temp_concept_node): while self.i_can_pop(parser_helper):
self.pop_stack_to_out() self.pop_stack_to_out()
if temp_concept_node.is_matched(): if parser_helper.is_matched():
# case of a prefix concept which has found happiness with self.parameters_list # case of a prefix concept which has found happiness with self.parameters_list
# directly put it in out # directly put it in out
self.manage_parameters_when_new_concept(temp_concept_node) self.manage_parameters_when_new_concept(parser_helper)
self._put_to_out(temp_concept_node.fix_concept()) self._put_to_out(parser_helper.fix_concept())
else: else:
self.stack.append(temp_concept_node) self.stack.append(parser_helper)
self.manage_parameters_when_new_concept(temp_concept_node) self.manage_parameters_when_new_concept(parser_helper)
def eat_unrecognized(self, token, pos): def eat_unrecognized(self, token, pos):
""" """
@@ -762,17 +856,33 @@ class InFixToPostFix:
if len(self.stack) == 0 and len(self.out) == 0: if len(self.stack) == 0 and len(self.out) == 0:
return # no need to pop the buffer, as no concept is found return # no need to pop the buffer, as no concept is found
while len(self.stack) > 0:
parser_helper = self.stack[-1]
# validate parenthesis
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
return None
self.manage_unrecognized() self.manage_unrecognized()
for forked in self.forked: for forked in self.forked:
# manage that some clones may have been forked # manage that some clones may have been forked
forked.finalize() forked.finalize()
while len(self.stack) > 0: failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]): if failed_to_match > 0:
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) # didn't manage to read all tokens.
return None # Transform them into unrecognized
self._transform_to_unrecognized(parser_helper)
self.false_positives.append(parser_helper)
self.stack.pop() # discard the parser helper
else:
self.pop_stack_to_out() # process it
self.pop_stack_to_out() self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
def clone(self): def clone(self):
clone = InFixToPostFix(self.context) clone = InFixToPostFix(self.context)
@@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser):
try: try:
if token.type in (TokenKind.LPAR, TokenKind.RPAR): if token.type in (TokenKind.LPAR, TokenKind.RPAR):
# little optim, no need to get the concept when parenthesis # little optim, no need to lock, unlock or get the concept when parenthesis
for infix_to_postfix in res: for infix_to_postfix in res:
infix_to_postfix.eat_token(token, self.pos) infix_to_postfix.eat_token(token, self.pos)
continue continue
@@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser):
if len(concepts) == 1: if len(concepts) == 1:
for infix_to_postfix in res: for infix_to_postfix in res:
infix_to_postfix.eat_concept(concepts[0], self.pos) infix_to_postfix.eat_concept(concepts[0], token, self.pos)
continue continue
# make the cartesian product # make the cartesian product
@@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser):
for concept in concepts: for concept in concepts:
clone = infix_to_postfix.clone() clone = infix_to_postfix.clone()
temp_res.append(clone) temp_res.append(clone)
clone.eat_concept(concept, self.pos) clone.eat_concept(concept, token, self.pos)
res = temp_res res = temp_res
finally: finally:
@@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser):
to_insert = item to_insert = item
sequence.insert(0, to_insert) sequence.insert(0, to_insert)
if has_unrecognized:
# Manage some sick cases where missing parenthesis mess the order or the sequence
# example "foo bar(one plus two"
sequence.sort(key=attrgetter("start"))
ret.append( ret.append(
self.sheerka.ret( self.sheerka.ret(
self.name, self.name,
-912
View File
@@ -1,912 +0,0 @@
# #####################################################################################################
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# # I don't directly use the project, but it helped me figure out
# # what to do.
# # Dejanović I., Milosavljević G., Vaderna R.:
# # Arpeggio: A flexible PEG parser for Python,
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
# #####################################################################################################
# from collections import namedtuple
# from dataclasses import dataclass
# from collections import defaultdict
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept, ConceptParts, DoNotResolve
# from core.tokenizer import TokenKind, Tokenizer, Token
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser, ErrorNode
# import core.utils
#
#
# class NonTerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, tokens, children=None):
# super().__init__(start, end, tokens)
# self.parsing_expression = parsing_expression
# self.children = children
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
# if len(self.children) > 0:
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
# else:
# sub_names = ""
# return name + sub_names
#
# def __eq__(self, other):
# if not isinstance(other, NonTerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.children == other.children
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.children))
#
#
# class TerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, value):
# super().__init__(start, end, source=value)
# self.parsing_expression = parsing_expression
# self.value = value
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or ""
# return name + f"'{self.value}'"
#
# def __eq__(self, other):
# if not isinstance(other, TerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.value == other.value
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.value))
#
#
# @dataclass()
# class UnknownConceptNode(ErrorNode):
# concept_key: str
#
#
# @dataclass()
# class TooManyConceptNode(ErrorNode):
# concept_key: str
#
#
# class ParsingExpression:
# def __init__(self, *args, **kwargs):
# self.elements = args
#
# nodes = kwargs.get('nodes', [])
# if not hasattr(nodes, '__iter__'):
# nodes = [nodes]
# self.nodes = nodes
#
# self.rule_name = kwargs.get('rule_name', '')
#
# def __eq__(self, other):
# if not isinstance(other, ParsingExpression):
# return False
#
# return self.rule_name == other.rule_name and self.elements == other.elements
#
# def __hash__(self):
# return hash((self.rule_name, self.elements))
#
# def parse(self, parser):
# return self._parse(parser)
#
# def add_rule_name_if_needed(self, text):
# return text + "=" + self.rule_name if self.rule_name else text
#
#
# class ConceptExpression(ParsingExpression):
# """
# Will match a concept
# It used only for rule definition
#
# When the grammar is created, it is replaced by the actual concept
# """
#
# def __init__(self, concept, rule_name=""):
# super().__init__(rule_name=rule_name)
# self.concept = concept
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"{self.concept}")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, ConceptExpression):
# return False
#
# if isinstance(self.concept, Concept):
# return self.concept.name == other.concept.name
#
# # when it's only the name of the concept
# return self.concept == other.concept
#
# def __hash__(self):
# return hash((self.concept, self.rule_name))
#
# @staticmethod
# def get_parsing_expression_from_name(name):
# tokens = Tokenizer(name)
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
# if len(nodes) == 1:
# return nodes[0]
# else:
# sequence = Sequence(nodes)
# sequence.nodes = nodes
# return sequence
#
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# # Try to match the concept using its name
# expr = self.get_parsing_expression_from_name(to_match.name)
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class ConceptGroupExpression(ConceptExpression):
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
# expr = OrderedChoice(nodes)
# expr.nodes = nodes
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class Sequence(ParsingExpression):
# """
# Will match sequence of parser expressions in exact order they are defined.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = parser.pos
#
# children = []
# for e in self.nodes:
# node = e.parse(parser)
# if node is None:
# return None
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class OrderedChoice(ParsingExpression):
# """
# Will match one among multiple
# It will stop at the first match (so the order of definition is important)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
#
# parser.seek(init_pos) # backtrack
#
# return None
#
# def __repr__(self):
# to_str = "| ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class Optional(ParsingExpression):
# """
# Will match or not the elements
# if many matches, will choose longest one
# If you need order, use Optional(OrderedChoice)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# if node.end > selected_node.end:
# selected_node = NonTerminalNode(
# self,
# node.start,
# node.end,
# parser.tokens[node.start: node.end + 1],
# [node])
#
# parser.seek(init_pos) # backtrack
#
# if selected_node.end != -1:
# parser.seek(selected_node.end)
# parser.next_token() # eat the tokens found
#
# return selected_node
#
# def __repr__(self):
# if len(self.elements) == 1:
# return f"{self.elements[0]}?"
# else:
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})?")
#
#
# class Repetition(ParsingExpression):
# """
# Base class for all repetition-like parser expressions (?,*,+)
# Args:
# eolterm(bool): Flag that indicates that end of line should
# terminate repetition match.
# """
#
# def __init__(self, *elements, **kwargs):
# super(Repetition, self).__init__(*elements, **kwargs)
# self.sep = kwargs.get('sep', None)
#
#
# class ZeroOrMore(Repetition):
# """
# ZeroOrMore will try to match parser expression specified zero or more
# times. It will never fail.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0:
# return NonTerminalNode(self, init_pos, -1, [], [])
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})*")
#
#
# class OneOrMore(Repetition):
# """
# OneOrMore will try to match parser expression specified one or more times.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0: # if nothing is found, it's an error
# return None
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})+")
#
#
# class UnorderedGroup(Repetition):
# """
# Will try to match all of the parsing expression in any order.
# """
#
# def _parse(self, parser):
# raise NotImplementedError()
#
# # def __repr__(self):
# # to_str = ", ".join(repr(n) for n in self.elements)
# # return f"({to_str})#"
#
#
# class Match(ParsingExpression):
# """
# Base class for all classes that will try to match something from the input.
# """
#
# def __init__(self, rule_name, root=False):
# super(Match, self).__init__(rule_name=rule_name, root=root)
#
# def parse(self, parser):
# result = self._parse(parser)
# return result
#
#
# class StrMatch(Match):
# """
# Matches a literal
# """
#
# def __init__(self, to_match, rule_name="", ignore_case=True):
# super(Match, self).__init__(rule_name=rule_name)
# self.to_match = to_match
# self.ignore_case = ignore_case
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, StrMatch):
# return False
#
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
#
# def _parse(self, parser):
# token = parser.get_token()
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
# else token.value == self.to_match
#
# if m:
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
# parser.next_token()
# return node
#
# return None
#
#
# class BnfNodeParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("BnfNode_old", 50)
# self.enabled = False
# if 'grammars' in kwargs:
# self.concepts_grammars = kwargs.get("grammars")
# elif 'sheerka' in kwargs:
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
# else:
# self.concepts_grammars = {}
#
# self.ignore_case = True
#
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context = None
# self.text = None
# self.sheerka = None
#
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.next_token()
# return error
#
# def reset_parser(self, context, text):
# self.context = context
# self.sheerka = context.sheerka
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
# except core.tokenizer.LexerError as e:
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
# return False
#
# self.token = None
# self.pos = -1
# self.next_token(False)
# return True
#
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
#
# def seek(self, pos):
# self.pos = pos
# self.token = self.tokens[self.pos]
# return True
#
# def rewind(self, offset, skip_whitespace=True):
# self.pos += offset
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
# self.pos -= 1
# self.token = self.tokens[self.pos]
#
# def initialize(self, context, concepts_definitions):
# """
# Adds a bunch of concepts, and how they can be recognized
# :param context: execution context
# :param concepts_definitions: dictionary of concept, concept_definition
# :return:
# """
#
# self.context = context
# self.sheerka = context.sheerka
# concepts_to_resolve = set()
#
# for concept, concept_def in concepts_definitions.items():
# # ## Gets the grammars
# context.log(f"Resolving grammar for '{concept}'", context.who)
# concept.init_key() # make sure that the key is initialized
# grammar = self.get_model(concept_def, concepts_to_resolve)
# self.concepts_grammars[concept] = grammar
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
#
# # ## Removes concepts with infinite recursions
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
# for concept in concepts_to_remove:
# concepts_to_resolve.remove(concept)
# del self.concepts_grammars[concept]
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
# else:
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
#
# def get_concept(self, concept_name):
# if concept_name in self.context.concepts:
# return self.context.concepts[concept_name]
# return self.sheerka.get_by_key(concept_name)
#
# def get_model(self, concept_def, concepts_to_resolve):
#
# # TODO
# # inner_get_model must not modify the initial ParsingExpression
# # A copy must be created
# def inner_get_model(expression):
# if isinstance(expression, Concept):
# if self.sheerka.isaset(self.context, expression):
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
# else:
# ret = ConceptExpression(expression, rule_name=expression.name)
# concepts_to_resolve.add(expression)
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
# if expression.rule_name is None or expression.rule_name == "":
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
# else expression.concept
# if isinstance(expression.concept, str):
# concept = self.get_concept(expression.concept)
# if self.sheerka.is_known(concept):
# expression.concept = concept
# concepts_to_resolve.add(expression.concept)
# ret = expression
# elif isinstance(expression, str):
# ret = StrMatch(expression, ignore_case=self.ignore_case)
# elif isinstance(expression, StrMatch):
# ret = expression
# if ret.ignore_case is None:
# ret.ignore_case = self.ignore_case
# elif isinstance(expression, Sequence) or \
# isinstance(expression, OrderedChoice) or \
# isinstance(expression, ZeroOrMore) or \
# isinstance(expression, OneOrMore) or \
# isinstance(expression, Optional):
# ret = expression
# ret.nodes = [inner_get_model(e) for e in ret.elements]
# else:
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
#
# # Translate separator expression.
# if isinstance(expression, Repetition) and expression.sep:
# expression.sep = inner_get_model(expression.sep)
#
# return ret
#
# model = inner_get_model(concept_def)
#
# return model
#
# def detect_infinite_recursion(self, concepts_to_resolve):
#
# # infinite recursion matcher
# def _is_infinite_recursion(ref_concept, node):
# if isinstance(node, ConceptExpression):
# if node.concept == ref_concept:
# return True
#
# if isinstance(node.concept, str):
# to_match = self.get_concept(node.concept)
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return False
# else:
# to_match = node.concept
#
# if to_match not in self.concepts_grammars:
# return False
#
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
#
# if isinstance(node, OrderedChoice):
# return _is_infinite_recursion(ref_concept, node.nodes[0])
#
# if isinstance(node, Sequence):
# for node in node.nodes:
# if _is_infinite_recursion(ref_concept, node):
# return True
# return False
#
# return False
#
# removed_concepts = []
# for e in concepts_to_resolve:
# if isinstance(e, str):
# e = self.get_concept(e)
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
# continue
#
# if e not in self.concepts_grammars:
# continue
#
# to_resolve = self.concepts_grammars[e]
# if _is_infinite_recursion(e, to_resolve):
# removed_concepts.append(e)
# return removed_concepts
#
# def parse(self, context, parser_input):
# if parser_input == "":
# return context.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
# )
#
# if not self.reset_parser(context, parser_input):
# return self.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
#
# concepts_found = [[]]
# unrecognized_tokens = None
# has_unrecognized = False
#
# # actually list of list
# # The first dimension is the number of possibilities found
# # The second dimension is the number of concepts found, under one possibility
# #
# # Example 1
# # concept foo : 'one' 'two'
# # concept bar : 'one' 'two'
# # input 'one two' -> will produce two possibilities (foo and bar).
# #
# # Example 2
# # concept foo : 'one'
# # concept bar : 'two'
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
#
# while True:
# init_pos = self.pos
# res = []
#
# for concept, grammar in self.concepts_grammars.items():
# self.seek(init_pos)
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
# if node is not None and node.end != -1:
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
# concept_node = ConceptNode(
# updated_concept,
# node.start,
# node.end,
# self.tokens[node.start: node.end + 1],
# None,
# node)
# res.append(concept_node)
#
# if len(res) == 0: # not recognized
# self.seek(init_pos)
# if unrecognized_tokens:
# unrecognized_tokens.add_token(self.get_token(), init_pos)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
#
# if not self.next_token(False):
# break
#
# else: # some concepts are recognized
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
# unrecognized_tokens = None
#
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
# concepts_found = core.utils.product(concepts_found, res)
#
# # loop
# self.seek(res[0].end)
# if not self.next_token(False):
# break
#
# # Fix the source for unrecognized tokens
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
#
# # else
# # returns as many ReturnValue than choices found
# ret = []
# for choice in concepts_found:
# ret.append(
# self.sheerka.ret(
# self.name,
# not has_unrecognized,
# self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input,
# body=choice,
# try_parsed=choice)))
#
# if len(ret) == 1:
# self.log_result(context, parser_input, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input, ret)
# return ret
#
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
# """
# Updates the properties of the concept
# Goes in recursion if the property is a concept
# """
#
# # this cache is to make sure that we return the same concept for the same ConceptExpression
# _underlying_value_cache = {}
#
# def _add_prop(_concept, prop_name, value):
# """
# Adds a new entry,
# makes a list if the property already exists
# """
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
# # new entry
# _concept.compiled[prop_name] = value
# else:
# # make a list if there was a value
# previous_value = _concept.compiled[prop_name]
# if isinstance(previous_value, list):
# previous_value.append(value)
# else:
# new_value = [previous_value, value]
# _concept.compiled[prop_name] = new_value
#
# def _look_for_concept_match(_underlying):
# """
# At some point, there is either an StrMatch or a ConceptMatch,
# that allowed the recognition.
# Look for the ConceptMatch, with recursion if needed
# """
# if isinstance(_underlying.parsing_expression, ConceptExpression):
# return _underlying
#
# if not isinstance(_underlying, NonTerminalNode):
# return None
#
# if len(_underlying.children) != 1:
# return None
#
# return _look_for_concept_match(_underlying.children[0])
#
# def _get_underlying_value(_underlying):
# concept_match_node = _look_for_concept_match(_underlying)
# if concept_match_node:
# # the value is a concept
# if id(concept_match_node) in _underlying_value_cache:
# result = _underlying_value_cache[id(concept_match_node)]
# else:
# ref_tpl = concept_match_node.parsing_expression.concept
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
# _underlying_value_cache[id(concept_match_node)] = result
# else:
# # the value is a string
# result = DoNotResolve(_underlying.source)
#
# return result
#
# def _process_rule_name(_concept, _underlying):
# if _underlying.parsing_expression.rule_name:
# value = _get_underlying_value(_underlying)
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
# _concept.metadata.need_validation = True
#
# if isinstance(_underlying, NonTerminalNode):
# for child in _underlying.children:
# _process_rule_name(_concept, child)
#
# key = (template.key, template.id) if template.id else template.key
# concept = sheerka.new(key)
# if init_empty_body and concept.metadata.body is None:
# value = _get_underlying_value(underlying)
# concept.compiled[ConceptParts.BODY] = value
# if underlying.parsing_expression.rule_name:
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
# # KSI : Why don't we set concept.metadata.need_validation to True ?
#
# if isinstance(underlying, NonTerminalNode):
# for node in underlying.children:
# _process_rule_name(concept, node)
#
# return concept
#
# def encode_grammar(self, grammar):
# """
# Transform the grammar into something that can easily can be serialized
# :param grammar:
# :return:
# """
#
# def _encode(expression):
# if isinstance(expression, StrMatch):
# res = f"'{expression.to_match}'"
#
# elif isinstance(expression, ConceptExpression):
# res = core.utils.str_concept(expression.concept)
#
# elif isinstance(expression, Sequence):
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, OrderedChoice):
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, Optional):
# res = _encode(expression.nodes[0]) + "?"
#
# elif isinstance(expression, ZeroOrMore):
# res = _encode(expression.nodes[0]) + "*"
#
# elif isinstance(expression, OneOrMore):
# res = _encode(expression.nodes[0]) + "+"
#
# if expression.rule_name:
# res += "=" + expression.rule_name
#
# return res
#
# result = {}
# for k, v in grammar.items():
# key = core.utils.str_concept(k)
# value = _encode(v)
# result[key] = value
# return result
#
# @staticmethod
# def get_bests(results):
# """
# Returns the result that is the longest
# :param results:
# :return:
# """
# by_end_pos = defaultdict(list)
# for result in results:
# by_end_pos[result.end].append(result)
#
# return by_end_pos[max(by_end_pos)]
#
#
# class ParsingExpressionVisitor:
# """
# visit ParsingExpression
# """
#
# def visit(self, parsing_expression):
# name = parsing_expression.__class__.__name__
#
# method = 'visit_' + name
# visitor = getattr(self, method, self.generic_visit)
# return visitor(parsing_expression)
#
# def generic_visit(self, parsing_expression):
# if hasattr(self, "visit_all"):
# self.visit_all(parsing_expression)
#
# for node in parsing_expression.elements:
# if isinstance(node, Concept):
# self.visit(ConceptExpression(node.key or node.name))
# elif isinstance(node, str):
# self.visit(StrMatch(node))
# else:
# self.visit(node)
-108
View File
@@ -1,108 +0,0 @@
# # try to match something like
# # ConceptNode 'plus' ConceptNode
# #
# # Replaced by SyaNodeParser
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind, Token
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from core.concept import VARIABLE_PREFIX
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# class ConceptsWithConceptsParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("ConceptsWithConcepts", 25)
# self.enabled = False
#
# @staticmethod
# def get_tokens(nodes):
# tokens = []
#
# for node in nodes:
# if isinstance(node, ConceptNode):
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
# else:
# for token in node.tokens:
# if token.type == TokenKind.EOF:
# break
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
# continue
# else:
# tokens.append(token)
#
# return tokens
#
# @staticmethod
# def get_key(nodes):
# key = ""
# index = 0
# for node in nodes:
# if key:
# key += " "
#
# if isinstance(node, UnrecognizedTokensNode):
# key += node.source.strip()
# else:
# key += f"{VARIABLE_PREFIX}{index}"
# index += 1
#
# return key
#
# def finalize_concept(self, context, concept, nodes):
# index = 0
# for node in nodes:
#
# if isinstance(node, ConceptNode):
# prop_name = list(concept.props.keys())[index]
# concept.compiled[prop_name] = node.concept
# context.log(
# f"Setting property '{prop_name}='{node.concept}'.",
# self.name)
# index += 1
# elif isinstance(node, SourceCodeNode):
# prop_name = list(concept.props.keys())[index]
# sheerka = context.sheerka
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
# context.log(
# f"Setting property '{prop_name}'='Python({node.source})'.",
# self.name)
# index += 1
#
# return concept
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
# if not nodes:
# return None
#
# concept_key = self.get_key(nodes)
# concept = sheerka.new(concept_key)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# return sheerka.ret(
# self.name,
# False,
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
#
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# for concept in concepts:
# self.finalize_concept(context, concept, nodes)
#
# res = []
# for concept in concepts:
# res.append(sheerka.ret(
# self.name,
# True,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=concept,
# try_parsed=None)))
#
# return res[0] if len(res) == 1 else res
-163
View File
@@ -1,163 +0,0 @@
# # to be replaced by SyaNodeParser
# import ast
#
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind
# from parsers.BaseNodeParser import SourceCodeNode
# from parsers.BaseParser import BaseParser
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
# import core.utils
# from parsers.PythonParser import PythonParser
#
# concept_lexer_parser = BnfNodeParser()
#
#
# class MultipleConceptsParser(BaseParser):
# """
# Parser that will take the result of BnfNodeParser and
# try to resolve the unrecognized tokens token by token
#
# It is a success when it returns a list ConceptNode exclusively
# """
#
# def __init__(self, **kwargs):
# BaseParser.__init__(self, "MultipleConcepts", 45)
# self.enabled = False
#
# @staticmethod
# def finalize(nodes_found, unrecognized_tokens):
# if not unrecognized_tokens:
# return nodes_found, unrecognized_tokens
#
# unrecognized_tokens.fix_source()
# if unrecognized_tokens.not_whitespace():
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
#
# return nodes_found, None
#
# @staticmethod
# def create_or_add(unrecognized_tokens, token, index):
# if unrecognized_tokens:
# unrecognized_tokens.add_token(token, index)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
# return unrecognized_tokens
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
# if not nodes:
# return None
#
# nodes_found = [[]]
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# unrecognized_tokens = None
# i = 0
#
# while i < len(node.tokens):
#
# token_index = node.start + i
# token = node.tokens[i]
#
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
# if concepts_nodes is not None:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
# i += 1
# continue
#
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
# if source_code_node:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, [source_code_node])
# i += len(source_code_node.tokens)
# continue
#
# # not a concept nor some source code
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
# i += 1
#
# # finish processing if needed
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
#
# else:
# nodes_found = core.utils.product(nodes_found, [node])
#
# ret = []
# for choice in nodes_found:
# ret.append(
# sheerka.ret(
# self.name,
# concepts_only,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=choice,
# try_parsed=None))
# )
#
# if len(ret) == 1:
# self.log_result(context, parser_input.source, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input.source, ret)
# return ret
#
# @staticmethod
# def get_concepts_nodes(context, index, token):
# """
# Tries to recognize a concept
# from the univers of all known concepts
# """
#
# if token.type != TokenKind.IDENTIFIER:
# return None
#
# concept = context.new_concept(token.value)
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
# return concepts_nodes
#
# return None
#
# @staticmethod
# def get_source_code_node(context, index, tokens):
# """
# Tries to recognize source code.
# For the time being, only Python is supported
# :param context:
# :param tokens:
# :param index:
# :return:
# """
#
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
# return None
#
# end_index = len(tokens)
# while end_index > 0:
# parser = PythonParser()
# tokens_to_parse = tokens[:end_index]
# res = parser.parse(context, tokens_to_parse)
# if res.status:
# # only expression are accepted
# ast_ = res.value.value.ast_
# if not isinstance(ast_, ast.Expression):
# return None
# try:
# compiled = compile(ast_, "<string>", "eval")
# eval(compiled, {}, {})
# except Exception:
# return None
#
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
# end_index -= 1
#
# return None
+2 -2
View File
@@ -12,10 +12,10 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF
("foo", ["foo"], "foo"), ("foo", ["foo"], "foo"),
("foo a", ["foo"], "__var__0 a"), ("foo a", ["foo"], "__var__0 a"),
("foo a b", ["a"], "foo __var__0 b"), ("foo a b", ["a"], "foo __var__0 b"),
("'foo'", [], "foo"), ("'foo'", [], "'foo'"),
("my name is a", ["a"], "my name is __var__0"), ("my name is a", ["a"], "my name is __var__0"),
("a b c d", ["b", "c"], "a __var__0 __var__1 d"), ("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
("a 'b c' d", ["b", "c"], "a b c d"), ("a 'b c' d", ["b", "c"], "a 'b c' d"),
("a | b", ["a", "b"], "__var__0 | __var__1"), ("a | b", ["a", "b"], "__var__0 | __var__1"),
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
+16 -2
View File
@@ -4,7 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
def test_i_can_tokenize(): def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:" source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`==#" source += "$£€!_identifier°~_^\\`==#__var__10"
tokens = list(Tokenizer(source)) tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -54,8 +54,9 @@ def test_i_can_tokenize():
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50) assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51) assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51)
assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53) assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53)
assert tokens[48] == Token(TokenKind.VAR_DEF, '__var__10', 112, 6, 54)
assert tokens[48] == Token(TokenKind.EOF, '', 112, 6, 54) assert tokens[49] == Token(TokenKind.EOF, '', 121, 6, 63)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
@@ -88,6 +89,19 @@ def test_i_can_parse_word(text):
assert tokens[1].index == len(text) assert tokens[1].index == len(text)
@pytest.mark.parametrize("text", [
"__var__0",
"__var__1",
"__var__10",
"__var__999",
])
def test_i_can_parse_var_def(text):
tokens = list(Tokenizer(text))
assert len(tokens) == 2
assert tokens[0].type == TokenKind.VAR_DEF
assert tokens[0].value == text
@pytest.mark.parametrize("text, message, error_text, index, line, column", [ @pytest.mark.parametrize("text, message, error_text, index, line, column", [
("'string", "Missing Trailing quote", "'string", 7, 1, 8), ("'string", "Missing Trailing quote", "'string", 7, 1, 8),
('"string', "Missing Trailing quote", '"string', 7, 1, 8), ('"string', "Missing Trailing quote", '"string', 7, 1, 8),
+2 -2
View File
@@ -36,9 +36,9 @@ def compute_debug_array(res):
if token.type == TokenKind.WHITESPACE: if token.type == TokenKind.WHITESPACE:
continue continue
else: else:
res_debug.append(token.value) res_debug.append("T(" + token.value + ")")
else: else:
res_debug.append(token.concept.name) res_debug.append("C(" + token.concept.name + ")")
to_compare.append(res_debug) to_compare.append(res_debug)
return to_compare return to_compare
+2 -2
View File
@@ -218,8 +218,8 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("hello foo bar", ("hello foo bar",
[ [
(True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]), (True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]),
(True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]), (True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]),
]), ]),
]) ])
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
File diff suppressed because it is too large Load Diff
@@ -1,193 +0,0 @@
# import ast
#
# import pytest
#
# from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
# from core.concept import Concept
# from core.tokenizer import Token, TokenKind, Tokenizer
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from parsers.PythonParser import PythonNode
#
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# def ret_val(*args):
# result = []
# index = 0
# source = ""
# for item in args:
# if isinstance(item, Concept):
# tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
# result.append(ConceptNode(item, index, index, tokens, item.name))
# index += 1
# source += item.name
# elif isinstance(item, PythonNode):
# tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
# result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
# index += len(tokens)
# source += item.source
# else:
# tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
# result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
# index += len(tokens)
# source += item
#
# return ReturnValueConcept(
# "who",
# False,
# ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
#
#
# class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
#
# def init(self, concepts, inputs):
# context = self.get_context()
# for concept in concepts:
# context.sheerka.create_new_concept(context, concept)
#
# return context, ret_val(*inputs)
#
# def execute(self, concepts, inputs):
# context, input_return_values = self.init(concepts, inputs)
#
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
#
# wrapper = result.body
# return_value = result.body.body
#
# return context, parser, result, wrapper, return_value
#
# @pytest.mark.parametrize("text, interested", [
# ("not parser result", False),
# (ParserResultConcept(parser="not multiple_concepts_parser"), False),
# (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
# ])
# def test_not_interested(self, text, interested):
# context = self.get_context()
#
# res = ConceptsWithConceptsParser().parse(context, text)
# if interested:
# assert res is not None
# else:
# assert res is None
#
# def test_i_can_parse_composition_of_concepts(self):
# foo = Concept("foo")
# bar = Concept("bar")
# plus = Concept("a plus b").def_var("a").def_var("b")
#
# context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus)
#
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == foo.init_key()
# assert evaluated.get_prop("b") == bar.init_key()
#
# def test_i_can_parse_when_composition_of_source_code(self):
# plus = Concept("a plus b", body="a + b").def_var("a").def_var("b")
# left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
# right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
# context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "1+1 plus 2+2"
# assert context.sheerka.isinstance(return_value, plus)
#
# left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
# right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
# assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == 2
# assert evaluated.get_prop("b") == 4
# assert evaluated.body == 6
#
# def test_i_can_parse_when_mix_of_concept_and_code(self):
# plus = Concept("a plus b").def_var("a").def_var("b")
# code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
# foo = Concept("foo")
# context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "foo plus 1+1"
# assert context.sheerka.isinstance(return_value, plus)
#
# code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == foo.init_key()
# assert evaluated.get_prop("b") == 2
#
# def test_i_can_parse_when_multiple_concepts_are_recognized(self):
# foo = Concept("foo")
# bar = Concept("bar")
# plus_1 = Concept("a plus b", body="body1").def_var("a").def_var("b")
# plus_2 = Concept("a plus b", body="body2").def_var("a").def_var("b")
#
# context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
#
# assert len(result) == 2
#
# res = result[0]
# wrapper = res.value
# return_value = res.value.value
# assert res.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert res.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus_1)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# res = result[1]
# wrapper = res.value
# return_value = res.value.value
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert res.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus_2)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# def test_i_cannot_parse_when_unknown_concept(self):
# foo = Concept("foo")
# bar = Concept("bar")
#
# context, input_return_values = self.init([foo, bar], [foo, " plus ", bar])
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
# wrapper = result.body
# return_value = result.body.body
#
# assert not result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
# assert result.who == parser.name
# assert return_value == input_return_values.body.body
@@ -1,216 +0,0 @@
# import pytest
#
# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
# from core.concept import Concept
# from core.tokenizer import Tokenizer, TokenKind, Token
# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode
# from parsers.BnfNodeParser import BnfNodeParser, Sequence
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from parsers.PythonParser import PythonNode
#
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
#
#
# def get_return_value(context, grammar, expression):
# parser = BnfNodeParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return ret_val
#
#
# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
#
# def init(self, concepts, grammar, expression):
# context = self.get_context()
# for c in concepts:
# context.sheerka.create_new_concept(context, c)
# return_value = get_return_value(context, grammar, expression)
#
# return context, return_value
#
# def test_not_interested_if_not_parser_result(self):
# context = self.get_context()
# text = "not parser result"
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_not_interested_if_not_from_concept_lexer_parser(self):
# context = self.get_context()
# text = ParserResultConcept(parser="not concept lexer", value="some value")
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_i_can_parse_exact_concepts(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# baz = Concept("baz", body="'baz'")
# grammar = {}
# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# ConceptNode(bar, 0, 0, source="bar"),
# ConceptNode(foo, 2, 2, source="foo"),
# ConceptNode(baz, 4, 4, source="baz")]
# assert ret_val.value.source == "bar foo baz"
#
# def test_i_can_parse_when_ending_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
# assert ret_val.value.source == "bar foo1 foo2 foo3"
#
# def test_i_can_parse_when_starting_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
# assert ret_val.value.source == "foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_concept_are_mixed(self):
# foo = Concept("foo")
# bar = Concept("bar")
# baz = Concept("baz")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("baz", 0, 0, "baz"),
# cnode("foo", 2, 6, "foo1 foo2 foo3"),
# cnode("bar", 8, 8, "bar")]
# assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_multiple_concepts_are_matching(self):
# foo = Concept("foo")
# bar = Concept("bar", body="bar1")
# baz = Concept("bar", body="bar2")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert len(ret_val) == 2
# assert ret_val[0].status
# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[0].value.source == "foo bar"
# assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
#
# assert ret_val[1].status
# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[1].value.source == "foo bar"
# assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
#
# def test_i_can_parse_when_source_code(self):
# foo = Concept("foo")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo], grammar, "1 foo")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
# wrapper = ret_val.value
# value = ret_val.value.value
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert wrapper.source == "1 foo"
# assert value == [
# scnode(0, 1, "1 "),
# cnode("foo", 2, 2, "foo")]
#
# def test_i_cannot_parse_when_unrecognized_token(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " + "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two + one"
#
# def test_i_cannot_parse_when_unknown_concepts(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " plus "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two plus one"
#
# @pytest.mark.parametrize("text, expected_source, expected_end", [
# ("True", "True", 0),
# ("1 == 1", "1 == 1", 4),
# ("1!xdf", "1", 0),
# ("1", "1", 0),
# ])
# def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
#
# start_index = 5 # a random number different of zero
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
#
# assert isinstance(res, SourceCodeNode)
# assert isinstance(res.node, PythonNode)
# assert res.source == expected_source
# assert res.start == start_index
# assert res.end == start_index + expected_end
#
# def test_i_cannot_parse_null_text(self):
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
# assert res is None
#
# eof = Token(TokenKind.EOF, "", 0, 0, 0)
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
# assert res is None
+204 -327
View File
@@ -31,6 +31,7 @@ cmap = {
"if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
"square": Concept("square(a)").def_var("a"), "square": Concept("square(a)").def_var("a"),
"foo bar": Concept("foo bar(a)").def_var("a"), "foo bar": Concept("foo bar(a)").def_var("a"),
"long infixed": Concept("a long infixed b").def_var("a").def_var("b"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
} }
@@ -50,8 +51,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
TestSyaNodeParser.sheerka.force_sya_def(context, [ TestSyaNodeParser.sheerka.force_sya_def(context, [
(cmap["plus"].id, 5, SyaAssociativity.Right), (cmap["plus"].id, 5, SyaAssociativity.Right),
(cmap["mult"].id, 10, SyaAssociativity.Right), (cmap["mult"].id, 10, SyaAssociativity.Right),
(cmap["minus"].id, 10, SyaAssociativity.Right), (cmap["minus"].id, 10, SyaAssociativity.Right)])
(cmap["square"].id, None, SyaAssociativity.No)])
def init_parser(self, def init_parser(self,
my_concepts_map=None, my_concepts_map=None,
@@ -98,99 +98,92 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one plus two", [["one", "two", "plus"]]), ("one plus two", [["one", "two", "plus"]]),
("1 + 1 plus two", [["1 + 1 ", "two", "plus"]]), ("1 + 1 plus two", [["1 + 1", "two", "plus"]]),
("one + two plus three", [ ("one + two plus three", [
["one", " + ", "two", "three", "plus"], ["one", " + ", "two", "three", "plus"],
["one + two ", "three", "plus"]]), ["one + two", "three", "plus"]]),
("twenty one plus two", [ ("twenty one plus two", [
["twenty ", "one", "two", "plus"], ["twenty ", "one", "two", "plus"],
[short_cnode("twenties", "twenty one"), "two", "plus"] [short_cnode("twenties", "twenty one"), "two", "plus"]
]), ]),
("x$!# plus two", [["x$!# ", "two", "plus"]]), ("x$!# plus two", [["x$!#", "two", "plus"]]),
("one plus 1 + 1", [["one", " 1 + 1", "plus"]]), ("one plus 1 + 1", [["one", "1 + 1", "plus"]]),
("1 + 1 plus 2 + 2", [["1 + 1 ", " 2 + 2", "plus"]]), ("1 + 1 plus 2 + 2", [["1 + 1", "2 + 2", "plus"]]),
("one + two plus 1 + 1", [ ("one + two plus 1 + 1", [
["one", " + ", "two", " 1 + 1", "plus"], ["one", " + ", "two", "1 + 1", "plus"],
["one + two ", " 1 + 1", "plus"] ["one + two", "1 + 1", "plus"]
]), ]),
("twenty one plus 1 + 1", [ ("twenty one plus 1 + 1", [
["twenty ", "one", " 1 + 1", "plus"], ["twenty ", "one", "1 + 1", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " 1 + 1", "plus"] [cnode("twenties", 0, 2, "twenty one"), "1 + 1", "plus"]
]), ]),
("x$!# plus 1 + 1", [["x$!# ", " 1 + 1", "plus"]]), ("x$!# plus 1 + 1", [["x$!#", "1 + 1", "plus"]]),
("one plus two + three", [ ("one plus two + three", [
["one", "two", "plus", " + ", "three"], ["one", "two", "plus", " + ", "three"],
["one", " two + three", "plus"], ["one", "two + three", "plus"],
]), ]),
("1 + 1 plus two + three", [ ("1 + 1 plus two + three", [
["1 + 1 ", "two", "plus", (" + ", 1), "three"], ["1 + 1", "two", "plus", (" + ", 1), "three"],
["1 + 1 ", " two + three", "plus"], ["1 + 1", "two + three", "plus"],
]), ]),
("one + two plus two + three", [ ("one + two plus two + three", [
["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"], ["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"],
["one + two ", ("two", 1), "plus", (" + ", 1), "three"], ["one + two", ("two", 1), "plus", (" + ", 1), "three"],
["one", " + ", "two", " two + three", "plus"], ["one", " + ", "two", "two + three", "plus"],
["one + two ", " two + three", "plus"], ["one + two", "two + three", "plus"],
]), ]),
("twenty one plus two + three", [ ("twenty one plus two + three", [
["twenty ", "one", "two", "plus", " + ", "three"], ["twenty ", "one", "two", "plus", " + ", "three"],
[cnode("twenties", 0, 2, "twenty one"), "two", "plus", " + ", "three"], [cnode("twenties", 0, 2, "twenty one"), "two", "plus", " + ", "three"],
["twenty ", "one", " two + three", "plus"], ["twenty ", "one", "two + three", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " two + three", "plus"], [cnode("twenties", 0, 2, "twenty one"), "two + three", "plus"],
]), ]),
("x$!# plus two + three", [ ("x$!# plus two + three", [
["x$!# ", "two", "plus", " + ", "three"], ["x$!#", "two", "plus", " + ", "three"],
["x$!# ", " two + three", "plus"], ["x$!#", "two + three", "plus"],
]), ]),
("one plus twenty two", [ ("one plus twenty two", [
["one", " twenty ", "plus", "two"], ["one", "twenty ", "plus", "two"],
["one", cnode("twenties", 4, 6, "twenty two"), "plus"], ["one", cnode("twenties", 4, 6, "twenty two"), "plus"],
]), ]),
("1 + 1 plus twenty one", [ ("1 + 1 plus twenty one", [
["1 + 1 ", " twenty ", "plus", "one"], ["1 + 1", "twenty ", "plus", "one"],
["1 + 1 ", cnode("twenties", 8, 10, "twenty one"), "plus"], ["1 + 1", cnode("twenties", 8, 10, "twenty one"), "plus"],
]), ]),
("one + two plus twenty one", [ ("one + two plus twenty one", [
["one", " + ", "two", " twenty ", "plus", ("one", 1)], ["one", " + ", "two", "twenty ", "plus", ("one", 1)],
["one + two ", " twenty ", "plus", ("one", 1)], ["one + two", "twenty ", "plus", ("one", 1)],
["one", " + ", "two", cnode("twenties", 8, 10, "twenty one"), "plus"], ["one", " + ", "two", cnode("twenties", 8, 10, "twenty one"), "plus"],
["one + two ", cnode("twenties", 8, 10, "twenty one"), "plus"], ["one + two", cnode("twenties", 8, 10, "twenty one"), "plus"],
]), ]),
("twenty one plus twenty two", ("twenty one plus twenty two",
[ [
["twenty ", "one", " twenty ", "plus", "two"], ["twenty ", "one", ("twenty ", 1), "plus", "two"],
[cnode("twenties", 0, 2, "twenty one"), " twenty ", "plus", "two"], [cnode("twenties", 0, 2, "twenty one"), ("twenty ", 1), "plus", "two"],
["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"], ["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"],
[cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"], [cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"],
]), ]),
("x$!# plus twenty two", [ ("x$!# plus twenty two", [
["x$!# ", " twenty ", "plus", "two"], ["x$!#", "twenty ", "plus", "two"],
["x$!# ", cnode("twenties", 7, 9, "twenty two"), "plus"] ["x$!#", cnode("twenties", 7, 9, "twenty two"), "plus"]
]), ]),
("one plus z$!#", [["one", " z$!#", "plus"]]), ("one plus z$!#", [["one", "z$!#", "plus"]]),
("1 + 1 plus z$!#", [["1 + 1 ", " z$!#", "plus"]]), ("1 + 1 plus z$!#", [["1 + 1", "z$!#", "plus"]]),
("one + two plus z$!#", [ ("one + two plus z$!#", [
["one", " + ", "two", " z$!#", "plus"], ["one", " + ", "two", "z$!#", "plus"],
["one + two ", " z$!#", "plus"], ["one + two", "z$!#", "plus"],
]), ]),
("twenty one plus z$!#", [ ("twenty one plus z$!#", [
["twenty ", "one", " z$!#", "plus"], ["twenty ", "one", "z$!#", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " z$!#", "plus"], [cnode("twenties", 0, 2, "twenty one"), "z$!#", "plus"],
]), ]),
("x$!# plus z$!#", [["x$!# ", " z$!#", "plus"]]), ("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]),
]) ])
def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -202,10 +195,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res_i.out == expected_array assert res_i.out == expected_array
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one plus plus plus 1 + 1", [["one", " 1 + 1", "plus plus plus"]]), ("one plus plus plus 1 + 1", [["one", "1 + 1", "plus plus plus"]]),
("x$!# another long name infix twenty two", [ ("x$!# another long name infix twenty two", [
["x$!# ", " twenty ", "another long name infix", "two"], ["x$!#", "twenty ", "another long name infix", "two"],
["x$!# ", cnode("twenties", 13, 15, "twenty two"), "another long name infix"], ["x$!#", cnode("twenties", 13, 15, "twenty two"), "another long name infix"],
]), ]),
]) ])
def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences): def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences):
@@ -229,24 +222,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one prefixed", [["one", "prefixed"]]), ("one prefixed", [["one", "prefixed"]]),
("1 + 1 prefixed", [["1 + 1 ", "prefixed"]]), ("1 + 1 prefixed", [["1 + 1", "prefixed"]]),
("one + two prefixed", [ ("one + two prefixed", [
["one", " + ", "two", "prefixed"], ["one", " + ", "two", "prefixed"],
["one + two ", "prefixed"], ["one + two", "prefixed"],
]), ]),
("twenty one prefixed", [ ("twenty one prefixed", [
["twenty ", "one", "prefixed"], ["twenty ", "one", "prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "prefixed"], [cnode("twenties", 0, 2, "twenty one"), "prefixed"],
]), ]),
("x$!# prefixed", [["x$!# ", "prefixed"]]), ("x$!# prefixed", [["x$!#", "prefixed"]]),
]) ])
def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -259,28 +246,28 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one prefixed prefixed", [["one", "prefixed prefixed"]]), ("one prefixed prefixed", [["one", "prefixed prefixed"]]),
("1 + 1 prefixed prefixed", [["1 + 1 ", "prefixed prefixed"]]), ("1 + 1 prefixed prefixed", [["1 + 1", "prefixed prefixed"]]),
("one + two prefixed prefixed", [ ("one + two prefixed prefixed", [
["one", " + ", "two", "prefixed prefixed"], ["one", " + ", "two", "prefixed prefixed"],
["one + two ", "prefixed prefixed"], ["one + two", "prefixed prefixed"],
]), ]),
("twenty one prefixed prefixed", [ ("twenty one prefixed prefixed", [
["twenty ", "one", "prefixed prefixed"], ["twenty ", "one", "prefixed prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "prefixed prefixed"], [cnode("twenties", 0, 2, "twenty one"), "prefixed prefixed"],
]), ]),
("x$!# prefixed prefixed", [["x$!# ", "prefixed prefixed"]]), ("x$!# prefixed prefixed", [["x$!#", "prefixed prefixed"]]),
("one long name prefixed", [["one", "long name prefixed"]]), ("one long name prefixed", [["one", "long name prefixed"]]),
("1 + 1 long name prefixed", [["1 + 1 ", "long name prefixed"]]), ("1 + 1 long name prefixed", [["1 + 1", "long name prefixed"]]),
("one + two long name prefixed", [ ("one + two long name prefixed", [
["one", " + ", "two", "long name prefixed"], ["one", " + ", "two", "long name prefixed"],
["one + two ", "long name prefixed"], ["one + two", "long name prefixed"],
]), ]),
("twenty one long name prefixed", [ ("twenty one long name prefixed", [
["twenty ", "one", "long name prefixed"], ["twenty ", "one", "long name prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "long name prefixed"], [cnode("twenties", 0, 2, "twenty one"), "long name prefixed"],
]), ]),
("x$!# long name prefixed", [["x$!# ", "long name prefixed"]]), ("x$!# long name prefixed", [["x$!#", "long name prefixed"]]),
]) ])
def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences): def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences):
concepts_map = { concepts_map = {
@@ -302,24 +289,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("suffixed one", [["one", "suffixed"]]), ("suffixed one", [["one", "suffixed"]]),
("suffixed 1 + 1", [[" 1 + 1", "suffixed"]]), ("suffixed 1 + 1", [["1 + 1", "suffixed"]]),
("suffixed one + two", [ ("suffixed one + two", [
["one", "suffixed", " + ", "two"], ["one", "suffixed", " + ", "two"],
[" one + two", "suffixed"], ["one + two", "suffixed"],
]), ]),
("suffixed twenty one", [ ("suffixed twenty one", [
[" twenty ", "suffixed", "one"], ["twenty ", "suffixed", "one"],
[cnode("twenties", 2, 4, "twenty one"), "suffixed"], [cnode("twenties", 2, 4, "twenty one"), "suffixed"],
]), ]),
("suffixed x$!#", [[" x$!#", "suffixed"]]), ("suffixed x$!#", [["x$!#", "suffixed"]]),
]) ])
def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -351,26 +332,27 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one ? two : three", [["one", "two", "three", "?"]]), ("one ? two : three", [["one", "two", "three", "?"]]),
("one ? baz qux : two", [["one", "baz qux", "two", "?"]]),
("1+1 ? one + two : twenty one", [ ("1+1 ? one + two : twenty one", [
["1+1 ", "one", " + ", "two"], # an error is detected ["1+1", "one", " + ", "two"], # error is detected so the parsing has stopped
["1+1 ", " one + two ", " twenty ", "?", ("one", 1)], ["1+1", "one + two", "twenty ", "?", ("one", 1)],
["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "?"], ["1+1", "one + two", short_cnode("twenties", "twenty one"), "?"],
]), ]),
("x$!# ? y$!# : z$!#", [["x$!# ", " y$!# ", " z$!#", "?"]]), ("x$!# ? y$!# : z$!#", [["x$!#", "y$!#", "z$!#", "?"]]),
("if one then two else three end", [["one", "two", "three", "if"]]), ("if one then two else three end", [["one", "two", "three", "if"]]),
("if 1+1 then x$!# else twenty one end", [ ("if 1+1 then x$!# else twenty one end", [
[" 1+1 ", " x$!# ", " twenty ", "one"], # an error is detected ["1+1", "x$!#", "twenty ", "one"], # an error is detected
[" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if"], ["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if"],
]), ]),
("if x$!# then one + two else z$!# end", [ ("if x$!# then one + two else z$!# end", [
[" x$!# ", "one", " + ", "two"], # an error is detected ["x$!#", "one", " + ", "two"], # error is detected so the parsing has stopped
[" x$!# ", " one + two ", " z$!# ", "if"], ["x$!#", "one + two", "z$!#", "if"],
]), ]),
]) ])
def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences): def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences):
""" """
The purpose of this test is to validate concepts like The purpose of this test is to validate concepts
that have at least 3 parameters separated by tokens that have at least 3 parameters separated by tokens
Example : Example :
var_0 token var_1 token var_2 var_0 token var_1 token var_2
@@ -381,14 +363,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
:return: :return:
""" """
# concepts_map = {
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -402,15 +376,15 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [ @pytest.mark.parametrize("expression, expected_sequences", [
("one ? ? two : : three", [["one", "two", "three", "? ?"]]), ("one ? ? two : : three", [["one", "two", "three", "? ?"]]),
("1+1 ? ? one + two : : twenty one", [ ("1+1 ? ? one + two : : twenty one", [
["1+1 ", "one", " + ", "two"], # error ["1+1", "one", " + ", "two"], # error
["1+1 ", " one + two ", " twenty ", "? ?", ("one", 1)], ["1+1", "one + two", "twenty ", "? ?", ("one", 1)],
["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "? ?"], ["1+1", "one + two", short_cnode("twenties", "twenty one"), "? ?"],
]), ]),
("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]), ("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]),
("if if 1+1 then then x$!# else else twenty one end end ", [ ("if if 1+1 then then x$!# else else twenty one end end ", [
[" 1+1 ", " x$!# ", " twenty ", "one"], # error ["1+1", "x$!#", "twenty ", "one"], # error
[" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if if"]]), ["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if if"]]),
]) ])
def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences): def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences):
concepts_map = { concepts_map = {
@@ -433,8 +407,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("foo bar baz", ["baz", "bar", "foo"]), ("foo bar baz", ["baz", "bar", "foo"]),
("foo bar x$!#", [" x$!#", "bar", "foo"]), ("foo bar x$!#", ["x$!#", "bar", "foo"]),
("foo bar 1 + 1", [" 1 + 1", "bar", "foo"]), ("foo bar 1 + 1", ["1 + 1", "bar", "foo"]),
]) ])
def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected): def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected):
concepts_map = { concepts_map = {
@@ -452,8 +426,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("baz bar foo", ["baz", "bar", "foo"]), ("baz bar foo", ["baz", "bar", "foo"]),
("x$!# bar foo", ["x$!# ", "bar", "foo"]), ("x$!# bar foo", ["x$!#", "bar", "foo"]),
("1 + 1 bar foo", ["1 + 1 ", "bar", "foo"]), ("1 + 1 bar foo", ["1 + 1", "bar", "foo"]),
]) ])
def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected): def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected):
concepts_map = { concepts_map = {
@@ -480,17 +454,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]), ("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]),
]) ])
def test_i_can_post_fix_binary_with_precedence(self, expression, expected): def test_i_can_post_fix_binary_with_precedence(self, expression, expected):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -566,7 +529,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
} }
sya_def = { sya_def = {
concepts_map["plus"]: (None, SyaAssociativity.Left), concepts_map["plus"]: (1, SyaAssociativity.Left),
} }
sheerka, context, parser = self.init_parser(concepts_map, sya_def) sheerka, context, parser = self.init_parser(concepts_map, sya_def)
@@ -580,14 +543,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array assert res[0].out == expected_array
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "two", "three", ("?", 1), "?"]), ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]),
("x$!# ? y$!# : (z$!# ? two : three)", ["x$!# ", " y$!# ", "z$!# ", "two", "three", ("?", 1), "?"]), ("x$!# ? y$!# : (z$!# ? two : three)", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]),
("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!# ", " y$!# ", " z$!#", ("?", 1), "three", "?"]), ("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]),
("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), ("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]),
]) ])
def test_i_can_post_fix_right_associated_ternary(self, expression, expected): def test_i_can_post_fix_right_associated_ternary(self, expression, expected):
concepts_map = { concepts_map = {
@@ -607,14 +570,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array assert res[0].out == expected_array
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "?", "two", "three", ("?", 1)]), ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]),
("(x$!# ? y$!# : z$!#) ? two : three", ["x$!# ", " y$!# ", " z$!#", "?", "two", "three", ("?", 1)]), ("(x$!# ? y$!# : z$!#) ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]),
# the following one is not possible when Left association # the following one is not possible when Left association
# ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), # ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", "?", " y$!# ", " z$!#", ("?", 1)]), ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]),
("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", " x$!#", "?", " y$!# ", " z$!#", ("?", 1)]), ("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]),
]) ])
def test_i_can_post_fix_left_associated_ternary(self, expression, expected): def test_i_can_post_fix_left_associated_ternary(self, expression, expected):
concepts_map = { concepts_map = {
@@ -644,7 +607,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
expression = "foo bar baz" expression = "foo bar baz"
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
expected_sequences = [ expected_sequences = [
[UTN(" bar "), "foo", "baz"], [UTN("bar "), "foo", "baz"],
["baz", "foo bar"] ["baz", "foo bar"]
] ]
@@ -669,9 +632,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
("(one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]), ("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]),
("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]), ("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]),
@@ -681,32 +644,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]), ("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]),
("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]), ("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]),
("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]), ("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]),
("( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]), ("(( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]),
("foo bar (one)", ["one", "foo bar"]), ("foo bar(one)", ["one", "foo bar"]),
("foo bar ( one )", ["one", "foo bar"]), ("foo bar ( one )", ["one", "foo bar"]),
]) ])
def test_i_can_pos_fix_when_parenthesis(self, expression, expected): def test_i_can_pos_fix_when_parenthesis(self, expression, expected):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "square": Concept("square(a)").def_var("a"),
# "foo bar": Concept("foo bar(a)").def_var("a"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "minus": Concept("a minus b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
#
# sya_def = {
# concepts_map["square"]: (None, SyaAssociativity.No),
# concepts_map["plus"]: (10, SyaAssociativity.Right),
# concepts_map["minus"]: (10, SyaAssociativity.Right),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -721,14 +664,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]), ("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]),
("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]), ("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]),
("function(suffixed twenty two)", [ ("function(suffixed twenty two)", [
[SCWC("function(", ")", " twenty ", "suffixed", "two")], [SCWC("function(", ")", "twenty ", "suffixed", "two")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]), [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]),
("function(twenty two prefixed)", [ ("function(twenty two prefixed)", [
[SCWC("function(", ")", "twenty ", "two", "prefixed")], [SCWC("function(", ")", "twenty ", "two", "prefixed")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")], [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")],
]), ]),
("function(if one then twenty two else three end)", [ ("function(if one then twenty two else three end)", [
["')'", "one", " twenty ", "two"], # error ["')'", "one", "twenty ", "two"], # error
[SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")] [SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")]
]), ]),
("func1(func2(one two) three)", [ ("func1(func2(one two) three)", [
@@ -744,16 +687,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
]), ]),
("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [ ("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [
[SCWC("f1(", ")", "one", "two", "three", "mult", "plus"), [SCWC("f1(", ")", "one", "two", "three", "mult", "plus"),
SCWC(" f2(", (")", 1), " x$!# ", "prefixed", "suffixed"), SCWC("f2(", (")", 1), "x$!#", "prefixed", "suffixed"),
("plus", 1)] ("plus", 1)]
]), ]),
# plus, suffixed, prefixed, ternary # plus, suffixed, prefixed, ternary
("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC(" func2(", (")", 1), "two"), "plus"]]), ("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC("func2(", (")", 1), "two"), "plus"]]),
("suffixed function(one)", [[SCWC(" function(", ")", "one"), "suffixed"]]), ("suffixed function(one)", [[SCWC("function(", ")", "one"), "suffixed"]]),
("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]), ("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]),
("if f1(one) then f2(two) else f3(three) end", [ ("if f1(one) then f2(two) else f3(three) end", [
[SCWC(" f1(", ")", "one"), SCWC(" f2(", (")", 1), "two"), SCWC(" f3(", (")", 2), "three"), "if"]]), [SCWC("f1(", ")", "one"), SCWC("f2(", (")", 1), "two"), SCWC("f3(", (")", 2), "three"), "if"]]),
# Sequence # Sequence
("if one then two else three end function(x$!#)", [ ("if one then two else three end function(x$!#)", [
@@ -762,21 +705,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]),
]) ])
def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -787,28 +715,22 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res_i.out == expected_array assert res_i.out == expected_array
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("(", ("(", 0)), # ("(", ("(", 0)),
("one plus ( 1 + ", ("(", 4)), # ("one plus ( 1 + ", ("(", 4)),
("one( 1 + ", ("(", 1)), # ("one( 1 + ", ("(", 1)),
("one ( 1 + ", ("(", 2)), # ("one ( 1 + ", ("(", 2)),
("function( 1 + ", ("(", 1)), # ("function( 1 + ", ("(", 1)),
("function ( 1 + ", ("(", 2)), # ("function ( 1 + ", ("(", 2)),
("one plus ) 1 + ", (")", 4)), # ("one plus ) 1 + ", (")", 4)),
("one ) 1 + ", (")", 2)), # ("one ) 1 + ", (")", 2)),
("function ) 1 + ", (")", 2)), # ("function ) 1 + ", (")", 2)),
("one ? ( : two", ("(", 4)), # ("one ? ( : two", ("(", 4)),
("one ? one plus ( : two", ("(", 8)), # ("one ? one plus ( : two", ("(", 8)),
("one ? ) : two", (")", 4)), # ("one ? ) : two", (")", 4)),
("one ? one plus ) : two", (")", 8)), # ("one ? one plus ) : two", (")", 8)),
("(one plus ( 1 + )", ("(", 0)), ("(one plus ( 1 + )", ("(", 0)),
]) ])
def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected):
# concepts_map = {
# "one": Concept("one"),
# "two": Concept("two"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -820,12 +742,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one ? one two : three", ("?", ":")), ("one ? one two : three", ("?", ":")),
]) ])
def test_i_can_detected_when_too_many_parameters(self, expression, expected): def test_i_can_detected_when_too_many_parameters(self, expression, expected):
# concepts_map = {
# "one": Concept("one"),
# "two": Concept("two"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# }
sheerka, context, parser = self.init_parser(cmap, None) sheerka, context, parser = self.init_parser(cmap, None)
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -850,27 +766,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]), ("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]),
("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]), ("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]),
("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]), ("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]),
("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!# ", " y$!# ", " z$!#", "?"]), ("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!#", "y$!#", "z$!#", "?"]),
("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]), ("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]),
("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]), ("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]),
("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]), ("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]),
("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!# ", " y$!# ", " z$!#", "?"]), ("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!#", "y$!#", "z$!#", "?"]),
("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]), ("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]),
]) ])
def test_i_can_post_fix_sequences(self, expression, expected): def test_i_can_post_fix_sequences(self, expression, expected):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "four": Concept("four"),
# }
sheerka, context, parser = self.init_parser(cmap, None) sheerka, context, parser = self.init_parser(cmap, None)
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -886,23 +791,49 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
"plus equals": Concept("a plus equals b").def_var("a").def_var("b"), "plus equals": Concept("a plus equals b").def_var("a").def_var("b"),
} }
sheerka, context, parser = self.init_parser(concepts_map, None) sya_def = {
concepts_map["plus"]: (1, SyaAssociativity.Right),
concepts_map["plus plus"]: (1, SyaAssociativity.Right),
concepts_map["plus equals"]: (1, SyaAssociativity.Right),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
expression = "a plus plus equals b" expression = "a plus plus equals b"
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
expected_array = tests.parsers.parsers_utils.compute_debug_array(res) expected_array = tests.parsers.parsers_utils.compute_debug_array(res)
assert expected_array == [ assert expected_array == [
["a", "a plus b", "a plus b", "equals", "b"], ["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"],
["a", "a plus b", "a plus plus", "equals", "b"], ["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"],
["a", "a plus b", "a plus equals b", "equals", "b"], ["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"],
["a", "a plus plus", "plus", "equals", "b"], ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["a", "a plus plus", "plus", "equals", "b"], ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["a", "a plus plus", "plus", "equals", "b"], ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["a", "a plus equals b", "a plus b", "equals", "b"], ["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"],
["a", "a plus equals b", "a plus plus", "equals", "b"], ["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"],
["a", "a plus equals b", "a plus equals b", "equals", "b"], ["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"],
] ]
def test_non_reg(self):
concepts_map = {
"plus": Concept("a plus b").def_var("a").def_var("b"),
"complex infix": Concept("a complex infix b ").def_var("a").def_var("b"),
}
sya_def = {
# concepts_map["plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus equals"]: (1, SyaAssociativity.Right),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
expression = "a plus complex infix b"
res = parser.infix_to_postfix(context, expression)
res = parser.parse(context, expression)
pass
def test_i_can_use_string_instead_of_identifier(self): def test_i_can_use_string_instead_of_identifier(self):
concepts_map = { concepts_map = {
"ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"), "ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"),
@@ -945,13 +876,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
Not quite sure why this test is here Not quite sure why this test is here
:return: :return:
""" """
# concepts_map = { sheerka, context, parser = self.init_parser()
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
expression = "suffixed twenties" expression = "suffixed twenties"
res = parser.infix_to_postfix(context, expression) res = parser.infix_to_postfix(context, expression)
@@ -962,17 +887,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array assert res[0].out == expected_array
def test_i_can_parse_when_concept_atom_only(self): def test_i_can_parse_when_concept_atom_only(self):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
text = "one plus two mult three" text = "one plus two mult three"
@@ -992,10 +906,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert expected_concept.compiled["b"].compiled["b"] == cmap["three"] assert expected_concept.compiled["b"].compiled["b"] == cmap["three"]
def test_i_can_parse_when_python_code(self): def test_i_can_parse_when_python_code(self):
# concepts_map = { sheerka, context, parser = self.init_parser()
# "foo": Concept("foo a").def_var("a")
# }
sheerka, context, parser = self.init_parser(cmap, None)
text = "suffixed 1 + 1" text = "suffixed 1 + 1"
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -1014,16 +925,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(return_value_a, BuiltinConcepts.RETURN_VALUE) assert sheerka.isinstance(return_value_a, BuiltinConcepts.RETURN_VALUE)
assert return_value_a.status assert return_value_a.status
assert sheerka.isinstance(return_value_a.body, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(return_value_a.body, BuiltinConcepts.PARSER_RESULT)
assert return_value_a.body.source == " 1 + 1" assert return_value_a.body.source == "1 + 1"
assert isinstance(return_value_a.body.body, PythonNode) assert isinstance(return_value_a.body.body, PythonNode)
def test_i_can_parse_when_bnf_concept(self): def test_i_can_parse_when_bnf_concept(self):
# concepts_map = {
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
text = "suffixed twenty one" text = "suffixed twenty one"
@@ -1043,13 +948,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"] assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"]
def test_i_can_parse_sequences(self): def test_i_can_parse_sequences(self):
# concepts_map = { sheerka, context, parser = self.init_parser()
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
text = "one plus 1 + 1 suffixed two" text = "one plus 1 + 1 suffixed two"
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -1081,27 +980,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("f1(one prefixed) plus f2(suffixed two)", True, [ ("f1(one prefixed) plus f2(suffixed two)", True, [
CNC("plus", CNC("plus",
a=SCWC("f1(", ")", CNC("prefixed", a="one")), a=SCWC("f1(", ")", CNC("prefixed", a="one")),
b=SCWC(" f2(", (")", 1), CNC("suffixed", a="two"))) b=SCWC("f2(", (")", 1), CNC("suffixed", a="two")))
]), ]),
("function(suffixed x$!#)", False, [ ("function(suffixed x$!#)", False, [
SCWC("function(", ")", CNC("suffixed", 2, 7, a=" x$!#"))]), SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
]) ])
def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): def test_i_can_parse_when_one_result(self, text, expected_status, expected_result):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -1113,41 +997,54 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array assert lexer_nodes == expected_array
# @pytest.mark.parametrize("text, list_of_expected", [ @pytest.mark.parametrize("text", [
# ("1 plus twenty one", [ "foo bar (one",
# (False, [CNC("plus", a=scnode(0, 0, "1"), b=UTN(" twenty ")), CN("one")]), "foo bar one",
# (True, [CNC("plus", a=scnode(0, 0, "1"), b=CN("twenties", source="twenty one"))]) "foo one two",
# ]) "foo x$!# one",
# ]) ])
# def test_i_can_parse_when_multiple_results(self, text, list_of_expected): def test_i_cannot_parse_when_concept_almost_found(self, text):
# concepts_map = { """
# "prefixed": Concept("a prefixed").def_var("a"), We test that the parsed concept seems like a known one, but it was not.
# "suffixed": Concept("suffixed a").def_var("a"), The parser has to detected that the predication was incorrect
# "mult": Concept("a mult b").def_var("a").def_var("b"), :return:
# "plus": Concept("a plus b").def_var("a").def_var("b"), """
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), sheerka, context, parser = self.init_parser()
# "one": Concept("one"),
# "two": Concept("two"), res = parser.parse(context, text)
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), assert not res.status
# } assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
# sya_def = { assert res.body.body == text
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus @pytest.mark.parametrize("text, expected_result", [
# } ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]),
# sheerka, context, parser = self.init_parser(concepts_map, sya_def) ("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),
# ("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]),
# list_of_res = parser.parse(context, text) ("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]),
# assert len(list_of_res) == len(list_of_expected) ("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]),
# ("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]),
# for res, expected in zip(list_of_res, list_of_expected): ("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]),
# wrapper = res.body ("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]),
# lexer_nodes = res.body.body ("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]),
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) ])
# def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result):
# expected_array = compute_expected_array(concepts_map, text, expected[1]) """
# assert res.status == expected[0] We test that the parsed concept seems like a known one, but it was not.
# assert lexer_nodes == expected_array The parser has to detected that the predication was incorrect
:return:
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
expected_array = compute_expected_array(cmap, text, expected_result)
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [ @pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [
("x$!# prefixed", "prefixed", ["a"]), ("x$!# prefixed", "prefixed", ["a"]),
@@ -1157,12 +1054,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("x$!# infix z$!#", "infix", ["a", "b"]), ("x$!# infix z$!#", "infix", ["a", "b"]),
]) ])
def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized): def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "prefixed": Concept("a prefixed").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "one": Concept("one")
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -1183,13 +1074,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]), ("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]),
]) ])
def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "prefixed": Concept("a prefixed").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -1203,7 +1087,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text", [
"one", "one",
"1 + 1", "1 + 1",
"x$!# ", "x$!#",
"twenty one" "twenty one"
"", "",
"function(not an sya concept)", "function(not an sya concept)",
@@ -1214,13 +1098,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
Atoms concepts, source code or BNF concepts alone are discarded by the lexer Atoms concepts, source code or BNF concepts alone are discarded by the lexer
:return: :return:
""" """
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, text) res = parser.parse(context, text)
File diff suppressed because it is too large Load Diff