Fixed SyaNodeParser false positive recognition issue

This commit is contained in:
2020-05-15 10:36:05 +02:00
parent 6e343ba996
commit 5489ef00b9
24 changed files with 484 additions and 5741 deletions
-18
View File
@@ -349,9 +349,6 @@ class EnumerationConcept(Concept):
self.set_value(ConceptParts.BODY, iteration)
self.metadata.is_evaluated = True
# def __iter__(self):
# return iter(self.body)
class ListConcept(Concept):
def __init__(self, items=None):
@@ -362,21 +359,6 @@ class ListConcept(Concept):
def append(self, obj):
self.body.append(obj)
# def __len__(self):
# return len(self.body)
#
# def __getitem__(self, key):
# return self.body[key]
#
# def __setitem__(self, key, value):
# self.body[key] = value
#
# def __iter__(self):
# return iter(self.body)
#
# def __contains__(self, item):
# return item in self.body
class FilteredConcept(Concept):
def __init__(self, filtered=None, iterable=None, predicate=None):
+1
View File
@@ -326,6 +326,7 @@ def ensure_evaluated(context, concept):
return evaluated
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
"""
Using parsers, try to recognize concepts from source
+3 -3
View File
@@ -221,7 +221,7 @@ class Concept:
Create the key for this concept.
Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake
The method is not called 'set_key' to make sure that no other class set the key by mistake
:param tokens:
:return:
"""
@@ -248,8 +248,8 @@ class Concept:
if token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value))
else:
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += value
#value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += token.value
first = False
self.metadata.key = key
@@ -56,12 +56,6 @@ class SheerkaCreateNewConcept:
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# update concept definition by key
# init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True)
# if not init_sya_ret_value.status:
# return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
# concepts_by_first_keyword = init_sya_ret_value.body
concept.freeze_definition_hash()
cache_manager.add_concept(concept)
@@ -74,21 +68,3 @@ class SheerkaCreateNewConcept:
# process the return if needed
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
# def load_concepts_nodes_definitions(self, context):
# """
# Gets from sdp what is need to parse nodes
# :return:
# """
# sdp = self.sheerka.sdp
#
# concepts_by_first_keyword = sdp.get(
# self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# resolved_concepts_by_first_keyword = init_ret_value.body
#
# return concepts_by_first_keyword, resolved_concepts_by_first_keyword
+4 -3
View File
@@ -1,8 +1,9 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
NO_MATCH = "** No Match **"
class SheerkaExecute:
"""
Manage the execution of a process flow
@@ -58,7 +59,8 @@ class SheerkaExecute:
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
# execution_context.log(f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context:
with execution_context.push(desc=f"Parsing using {parser.name}",
logger=parser.verbose_log) as sub_context:
sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse)
if res is not None:
@@ -86,7 +88,6 @@ class SheerkaExecute:
stop_processing = True
sub_context.add_values(return_values=res)
if stop_processing:
break # Do not try the other priorities if a match is found
@@ -35,7 +35,7 @@ class SheerkaModifyConcept:
# TODO : update concept by first keyword
# TODO : update resolved by first keyword
# TODO : update concets grammars
# TODO : update concepts grammars
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
@@ -1,8 +1,6 @@
from dataclasses import dataclass
from typing import List
from sdp.sheerkaSerializer import Serializer
@dataclass
class Variable:
-48
View File
@@ -60,10 +60,6 @@ class Sheerka(Concept):
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
# # Cache for concepts grammars
# # To be shared between BNFNode parsers instances
# self.concepts_grammars = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts
@@ -303,27 +299,6 @@ class Sheerka(Concept):
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# sya = self.bnf.resolve_sya_associativity_and_precedence()
# self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya)
#
#
# self.concepts_by_first_keyword, \
# self.resolved_concepts_by_first_keyword = \
# self.create_new_concept_handler.load_concepts_nodes_definitions(context)
# self.concepts_by_first_keyword = self.sdp.get_safe(
# self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
# load_origin=False) or {}
#
# self.sya_definitions = self.sdp.get_safe(
# self.CONCEPTS_SYA_DEFINITION_ENTRY,
# load_origin=False) or {}
#
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword)
# if not init_ret_value.status:
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
# self.resolved_concepts_by_first_keyword = init_ret_value.body
def reset(self, cache_only=False):
self.cache_manager.clear()
self.cache_manager.cache_only = cache_only
@@ -346,7 +321,6 @@ class Sheerka(Concept):
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
# execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED)
steps = [
BuiltinConcepts.BEFORE_PARSING,
@@ -525,28 +499,6 @@ class Sheerka(Concept):
return concept
#
# def get(self, concept_key, concept_id=None):
# """
# Tries to find a concept
# What is return must be used a template for another concept.
# You must not modify the returned concept
# :param concept_key: key of the concept
# :param concept_id: when multiple concepts with the same key, use the id
# :return:
# """
#
# by_key = self.get_by_key(concept_key)
# if self.is_known(by_key):
# return by_key
#
# # else return by name
# by_name = self.get_by_name(concept_key)
# if self.is_known(by_name):
# return by_name
#
# return by_key # return not found for key
def get_by_key(self, concept_key, concept_id=None):
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
+32 -2
View File
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from enum import Enum
@@ -48,6 +48,7 @@ class TokenKind(Enum):
DEGREE = "degree" # °
WORD = "word"
EQUALSEQUALS = "=="
VAR_DEF = "__var__"
@dataclass()
@@ -58,6 +59,8 @@ class Token:
line: int
column: int
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self):
if self.type == TokenKind.IDENTIFIER:
value = str(self.value)
@@ -72,6 +75,23 @@ class Token:
return f"Token({value})"
@property
def str_value(self):
if self._str_value:
return self._str_value
if self.type == TokenKind.STRING:
self._str_value = self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
self._str_value = self.value.value
else:
self._str_value = str(self.value)
return self._str_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
@dataclass()
class LexerError(Exception):
@@ -101,12 +121,13 @@ class Tokenizer:
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text, parse_word=False):
def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
self.yield_eof = yield_eof
self.parse_word = parse_word
def __iter__(self):
@@ -134,6 +155,7 @@ class Tokenizer:
self.i += 1
self.column += 1
elif c == "_":
from core.concept import VARIABLE_PREFIX
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -141,6 +163,13 @@ class Tokenizer:
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif self.i + 7 < self.text_len and \
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
self.text[self.i + 7].isdigit():
number = self.eat_number(self.i + 7)
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
self.i += 7 + len(number)
self.column += 7 + len(number)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
@@ -308,6 +337,7 @@ class Tokenizer:
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
if self.yield_eof:
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept(self, start, line, column):
+2 -2
View File
@@ -91,7 +91,7 @@ class AtomConceptParserHelper:
self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
if self.expected_tokens[0] != token.str_value:
self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token,
@@ -119,7 +119,7 @@ class AtomConceptParserHelper:
forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
if not expected:
# the concept is already matched
+20 -12
View File
@@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode):
self.is_frozen = False
self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos):
if self.is_frozen:
raise Exception("The node is frozen")
@@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode):
return self
def pop(self, token_kind):
if self.is_frozen:
raise Exception("The node is frozen")
if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
self.tokens.pop()
if len(self.tokens) == 0:
self.reset()
else:
self.end -= 1
def has_open_paren(self):
return self.parenthesis_count > 0
def not_whitespace(self):
return not self.is_whitespace()
@@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode):
def is_empty(self):
return len(self.tokens) == 0
def last_token_type(self):
if len(self.tokens) == 0:
return None
return self.tokens[-1].type
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
@@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser):
return custom_concepts if custom else None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
@staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
"""
+189 -74
View File
@@ -1,15 +1,16 @@
from collections import namedtuple
from dataclasses import dataclass, field
from operator import attrgetter
from typing import List
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import Token, TokenKind
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
from parsers.BaseParser import ErrorNode
PARSERS = ["BnfNode", "AtomNode", "Python"]
@@ -88,10 +89,13 @@ class SyaConceptParserHelper:
concept: Concept
start: int # position of the token in the tokenizer (Caution, it is not token.index)
end: int = field(default=-1, repr=False, compare=False, hash=None)
expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None)
expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
error: str = None
def __post_init__(self):
@@ -99,17 +103,20 @@ class SyaConceptParserHelper:
if self.end == -1:
self.end = self.start
first_keyword_found = False
for name in concept.key.split():
if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found:
first_keyword_found = True
first_keyword_found = None
for token in Tokenizer(concept.key, yield_eof=False):
if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
first_keyword_found = token
if first_keyword_found:
self.expected.append(name)
self.expected.append(token)
else:
self.last_token_before_first_token = token
if token.type != TokenKind.WHITESPACE:
self.expected_parameters_before_first_token += 1
self.eat_token() # remove the fist token
self.eat_token(first_keyword_found) # remove the first token
self.tokens.append(first_keyword_found)
def is_matched(self):
return len(self.expected) == 0
@@ -117,23 +124,38 @@ class SyaConceptParserHelper:
def is_atom(self):
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
def is_expected(self, token):
if self.is_matched():
def is_next(self, token):
if self.is_matched() or len(self.expected) == 0:
return False
token_value = BaseNodeParser.get_token_value(token)
# True if the next token is the one that is expected
# Or if the next token is a whitespace and the expected one is the one after
# (whitespace are sometimes not mandatory)
return token.str_value == self.expected[0].str_value or \
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
def is_expected(self, token):
if self.is_matched() or token.type == TokenKind.WHITESPACE:
return False
for expected in self.expected:
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
return True
return False
def expected_parameters(self):
return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected))
return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
def eat_token(self):
# No check, as it is used only after is_expected
def eat_token(self, until_token):
"""
eat until token 'until'
:param until_token:
:return:
"""
# No check, as it is used only after is_expected() or is_next()
while self.expected[0].str_value != until_token.str_value:
del self.expected[0]
del self.expected[0]
# return True is a whole sequence of keyword is eaten
@@ -143,7 +165,10 @@ class SyaConceptParserHelper:
if len(self.expected) == 0:
return True
return self.expected[0].startswith(VARIABLE_PREFIX)
# also return True at the end of a name sequence
# ... <var0> bar baz qux <var1>
# return True after 'qux', to indicate all the parameters from <var0> must be processed
return self.expected[0].type == TokenKind.VAR_DEF
def eat_parameter(self, parameter):
if self.is_matched() and parameter == self:
@@ -153,7 +178,7 @@ class SyaConceptParserHelper:
self.error = "No more parameter expected"
return
if not self.expected[0].startswith(VARIABLE_PREFIX):
if self.expected[0].type != TokenKind.VAR_DEF:
self.error = "Parameter was not expected"
return
@@ -202,6 +227,7 @@ class InFixToPostFix:
self.errors = [] # Not quite sure that I can handle more than one error
self.debug = []
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
def __repr__(self):
@@ -245,7 +271,6 @@ class InFixToPostFix:
Note that when we are parsing non recognized tokens,
we consider that the parenthesis are part of the non recognized
:param token:
:param stack:
:return:
"""
return isinstance(token, Token) and token.type == TokenKind.RPAR
@@ -268,10 +293,10 @@ class InFixToPostFix:
:return:
"""
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
if item.expected[0].startswith(VARIABLE_PREFIX):
if item.expected[0].type == TokenKind.VAR_DEF:
item.error = "Not enough suffix parameters"
else:
item.error = f"token '{item.expected[0]}' not found"
item.error = f"token '{item.expected[0].str_value}' not found"
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item)
@@ -328,6 +353,16 @@ class InFixToPostFix:
).pseudo_fix_source()
return source_code
def _transform_to_unrecognized(self, parser_helper):
# an Unrecognized when sent to out too prematurely
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
self.unrecognized_tokens = self.out.pop()
if parser_helper.remember_whitespace:
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
for i, token in enumerate(parser_helper.tokens):
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def get_errors(self):
res = []
res.extend(self.errors)
@@ -343,28 +378,28 @@ class InFixToPostFix:
self.is_locked = False
def manage_parameters_when_new_concept(self, temp_concept_node):
def manage_parameters_when_new_concept(self, parser_helper):
"""
When a new concept is create, we need to check what to do with the parameters
that were queued
:param temp_concept_node: new concept
:param parser_helper: new concept
:return:
"""
if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token:
if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
# The new concept expect some prefix parameters, but there's not enough
temp_concept_node.error = "Not enough prefix parameters"
parser_helper.error = "Not enough prefix parameters"
return
if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token:
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
# There are more parameters than needed by the new concept
# The others are either
# - parameters for the previous concept (if any)
# - concepts on their own
# - syntax error
# In all the cases, the only thing that matter is to pop what is expected by the new concept
for i in range(temp_concept_node.expected_parameters_before_first_token):
for i in range(parser_helper.expected_parameters_before_first_token):
self.parameters_list.pop()
temp_concept_node.parameters_list_at_init.extend(self.parameters_list)
parser_helper.parameters_list_at_init.extend(self.parameters_list)
return
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
@@ -385,14 +420,18 @@ class InFixToPostFix:
:return:
"""
# manage parenthesis that didn't find any match
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
# The parameter must be part the current concept being parsed
assert len(self._concepts()) != 0 # sanity check
current_concept = self._concepts()[-1]
while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX):
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
# eat everything that was expected
if len(self.parameters_list) == 0:
# current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'"
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
return
del self.parameters_list[0]
del current_concept.expected[0]
@@ -506,6 +545,11 @@ class InFixToPostFix:
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
if not current.precedence:
# precedence is not set (None or zero)
# Do not apply any rule
return False
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
return True
@@ -528,9 +572,55 @@ class InFixToPostFix:
:return:
"""
def _pop_stack(c):
while self.stack[-1] != c and not self._is_lpar(c):
self.pop_stack_to_out()
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
for current_concept in reversed(self._concepts()):
# As I may loose memory again ;-)
# it's a reversed loop to manage cases like
# if a plus b then ...
# The current concept is 'plus', but the token is 'then'
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
if current_concept.is_next(token):
current_concept.end = pos
current_concept.tokens.append(token)
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
if current_concept.expected[0].type == TokenKind.WHITESPACE:
# drop it. It's the case where an optional whitespace is missing
del (current_concept.expected[0])
else:
# error
# We are not parsing the concept we tought we were parsing.
# Transform the eaten tokens into unrecognized
# and discard the current SyaConceptParserHelper
# TODO: manage the pending LPAR, RPAR ?
self._transform_to_unrecognized(current_concept)
self.false_positives.append(current_concept)
self.stack.pop()
return False
if current_concept.is_expected(token):
# Fix the whitespace between var and expected if needed
# current_concept.expected[0] is '<var>'
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
current_concept.end = pos
self.manage_unrecognized()
# manage that some clones may have been forked
@@ -550,36 +640,33 @@ class InFixToPostFix:
self.parameters_list[:]))
return True # no need to continue
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
current = self.stack[-1]
if current.error:
self._transform_to_unrecognized(current)
self.false_positives.append(current)
self.stack.pop()
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
forked.handle_expected_token(token, pos)
else:
self.pop_stack_to_out()
self.manage_parameters()
if current_concept.eat_token():
while self.stack[-1] != current_concept and not self._is_lpar(current_concept):
self.pop_stack_to_out()
# maybe eat whitespace that was between <var> and expected token
if current_concept.expected[0].type == TokenKind.WHITESPACE:
del current_concept.expected[0]
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
# else:
# if token.type != TokenKind.WHITESPACE:
# # hack, because whitespaces are not correctly parsed in self.expected
# # KSI 2020/04/25
# # I no longer understand why we are in a loop (the reverse one)
# # if we are parsing a concept and the expected token does not match
# # The whole class should be in error
# self._add_error(UnexpectedTokenErrorNode(
# f"Failed to parse '{current_concept.concept.concept}'",
# token, current_concept.expected))
# return False
return False
def eat_token(self, token, pos):
@@ -692,10 +779,11 @@ class InFixToPostFix:
return False
def eat_concept(self, sya_concept_def, pos):
def eat_concept(self, sya_concept_def, token, pos):
"""
a concept is found
:param sya_concept_def:
:param token:
:param pos:
:return:
"""
@@ -704,37 +792,43 @@ class InFixToPostFix:
return
self.debug.append(sya_concept_def)
temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos)
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
# First, try to recognize the tokens that are waiting
self.manage_unrecognized()
for forked in self.forked:
# manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, pos)
forked.eat_concept(sya_concept_def, token, pos)
# then, check if this new concept is linked to the previous ones
# ie, is the previous concept fully matched ?
if temp_concept_node.expected_parameters_before_first_token == 0:
if parser_helper.expected_parameters_before_first_token == 0:
# => does not expect pending parameter (it's suffixed concept)
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
# => previous seems to have everything it needs in the parameter list
self.pop_stack_to_out()
if temp_concept_node.is_atom():
self._put_to_out(temp_concept_node.fix_concept())
if parser_helper.is_atom():
self._put_to_out(parser_helper.fix_concept())
else:
# call shunting yard algorithm
while self.i_can_pop(temp_concept_node):
while self.i_can_pop(parser_helper):
self.pop_stack_to_out()
if temp_concept_node.is_matched():
if parser_helper.is_matched():
# case of a prefix concept which has found happiness with self.parameters_list
# directly put it in out
self.manage_parameters_when_new_concept(temp_concept_node)
self._put_to_out(temp_concept_node.fix_concept())
self.manage_parameters_when_new_concept(parser_helper)
self._put_to_out(parser_helper.fix_concept())
else:
self.stack.append(temp_concept_node)
self.manage_parameters_when_new_concept(temp_concept_node)
self.stack.append(parser_helper)
self.manage_parameters_when_new_concept(parser_helper)
def eat_unrecognized(self, token, pos):
"""
@@ -762,17 +856,33 @@ class InFixToPostFix:
if len(self.stack) == 0 and len(self.out) == 0:
return # no need to pop the buffer, as no concept is found
while len(self.stack) > 0:
parser_helper = self.stack[-1]
# validate parenthesis
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
return None
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
while len(self.stack) > 0:
if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]):
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
return None
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
if failed_to_match > 0:
# didn't manage to read all tokens.
# Transform them into unrecognized
self._transform_to_unrecognized(parser_helper)
self.false_positives.append(parser_helper)
self.stack.pop() # discard the parser helper
else:
self.pop_stack_to_out() # process it
self.pop_stack_to_out()
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
def clone(self):
clone = InFixToPostFix(self.context)
@@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser):
try:
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
# little optim, no need to get the concept when parenthesis
# little optim, no need to lock, unlock or get the concept when parenthesis
for infix_to_postfix in res:
infix_to_postfix.eat_token(token, self.pos)
continue
@@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser):
if len(concepts) == 1:
for infix_to_postfix in res:
infix_to_postfix.eat_concept(concepts[0], self.pos)
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
continue
# make the cartesian product
@@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser):
for concept in concepts:
clone = infix_to_postfix.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
clone.eat_concept(concept, token, self.pos)
res = temp_res
finally:
@@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser):
to_insert = item
sequence.insert(0, to_insert)
if has_unrecognized:
# Manage some sick cases where missing parenthesis mess the order or the sequence
# example "foo bar(one plus two"
sequence.sort(key=attrgetter("start"))
ret.append(
self.sheerka.ret(
self.name,
-912
View File
@@ -1,912 +0,0 @@
# #####################################################################################################
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# # I don't directly use the project, but it helped me figure out
# # what to do.
# # Dejanović I., Milosavljević G., Vaderna R.:
# # Arpeggio: A flexible PEG parser for Python,
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
# #####################################################################################################
# from collections import namedtuple
# from dataclasses import dataclass
# from collections import defaultdict
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept, ConceptParts, DoNotResolve
# from core.tokenizer import TokenKind, Tokenizer, Token
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser, ErrorNode
# import core.utils
#
#
# class NonTerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, tokens, children=None):
# super().__init__(start, end, tokens)
# self.parsing_expression = parsing_expression
# self.children = children
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
# if len(self.children) > 0:
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
# else:
# sub_names = ""
# return name + sub_names
#
# def __eq__(self, other):
# if not isinstance(other, NonTerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.children == other.children
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.children))
#
#
# class TerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, value):
# super().__init__(start, end, source=value)
# self.parsing_expression = parsing_expression
# self.value = value
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or ""
# return name + f"'{self.value}'"
#
# def __eq__(self, other):
# if not isinstance(other, TerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.value == other.value
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.value))
#
#
# @dataclass()
# class UnknownConceptNode(ErrorNode):
# concept_key: str
#
#
# @dataclass()
# class TooManyConceptNode(ErrorNode):
# concept_key: str
#
#
# class ParsingExpression:
# def __init__(self, *args, **kwargs):
# self.elements = args
#
# nodes = kwargs.get('nodes', [])
# if not hasattr(nodes, '__iter__'):
# nodes = [nodes]
# self.nodes = nodes
#
# self.rule_name = kwargs.get('rule_name', '')
#
# def __eq__(self, other):
# if not isinstance(other, ParsingExpression):
# return False
#
# return self.rule_name == other.rule_name and self.elements == other.elements
#
# def __hash__(self):
# return hash((self.rule_name, self.elements))
#
# def parse(self, parser):
# return self._parse(parser)
#
# def add_rule_name_if_needed(self, text):
# return text + "=" + self.rule_name if self.rule_name else text
#
#
# class ConceptExpression(ParsingExpression):
# """
# Will match a concept
# It used only for rule definition
#
# When the grammar is created, it is replaced by the actual concept
# """
#
# def __init__(self, concept, rule_name=""):
# super().__init__(rule_name=rule_name)
# self.concept = concept
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"{self.concept}")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, ConceptExpression):
# return False
#
# if isinstance(self.concept, Concept):
# return self.concept.name == other.concept.name
#
# # when it's only the name of the concept
# return self.concept == other.concept
#
# def __hash__(self):
# return hash((self.concept, self.rule_name))
#
# @staticmethod
# def get_parsing_expression_from_name(name):
# tokens = Tokenizer(name)
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
# if len(nodes) == 1:
# return nodes[0]
# else:
# sequence = Sequence(nodes)
# sequence.nodes = nodes
# return sequence
#
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# # Try to match the concept using its name
# expr = self.get_parsing_expression_from_name(to_match.name)
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class ConceptGroupExpression(ConceptExpression):
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
# expr = OrderedChoice(nodes)
# expr.nodes = nodes
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class Sequence(ParsingExpression):
# """
# Will match sequence of parser expressions in exact order they are defined.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = parser.pos
#
# children = []
# for e in self.nodes:
# node = e.parse(parser)
# if node is None:
# return None
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class OrderedChoice(ParsingExpression):
# """
# Will match one among multiple
# It will stop at the first match (so the order of definition is important)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
#
# parser.seek(init_pos) # backtrack
#
# return None
#
# def __repr__(self):
# to_str = "| ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class Optional(ParsingExpression):
# """
# Will match or not the elements
# if many matches, will choose longest one
# If you need order, use Optional(OrderedChoice)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# if node.end > selected_node.end:
# selected_node = NonTerminalNode(
# self,
# node.start,
# node.end,
# parser.tokens[node.start: node.end + 1],
# [node])
#
# parser.seek(init_pos) # backtrack
#
# if selected_node.end != -1:
# parser.seek(selected_node.end)
# parser.next_token() # eat the tokens found
#
# return selected_node
#
# def __repr__(self):
# if len(self.elements) == 1:
# return f"{self.elements[0]}?"
# else:
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})?")
#
#
# class Repetition(ParsingExpression):
# """
# Base class for all repetition-like parser expressions (?,*,+)
# Args:
# eolterm(bool): Flag that indicates that end of line should
# terminate repetition match.
# """
#
# def __init__(self, *elements, **kwargs):
# super(Repetition, self).__init__(*elements, **kwargs)
# self.sep = kwargs.get('sep', None)
#
#
# class ZeroOrMore(Repetition):
# """
# ZeroOrMore will try to match parser expression specified zero or more
# times. It will never fail.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0:
# return NonTerminalNode(self, init_pos, -1, [], [])
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})*")
#
#
# class OneOrMore(Repetition):
# """
# OneOrMore will try to match parser expression specified one or more times.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0: # if nothing is found, it's an error
# return None
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})+")
#
#
# class UnorderedGroup(Repetition):
# """
# Will try to match all of the parsing expression in any order.
# """
#
# def _parse(self, parser):
# raise NotImplementedError()
#
# # def __repr__(self):
# # to_str = ", ".join(repr(n) for n in self.elements)
# # return f"({to_str})#"
#
#
# class Match(ParsingExpression):
# """
# Base class for all classes that will try to match something from the input.
# """
#
# def __init__(self, rule_name, root=False):
# super(Match, self).__init__(rule_name=rule_name, root=root)
#
# def parse(self, parser):
# result = self._parse(parser)
# return result
#
#
# class StrMatch(Match):
# """
# Matches a literal
# """
#
# def __init__(self, to_match, rule_name="", ignore_case=True):
# super(Match, self).__init__(rule_name=rule_name)
# self.to_match = to_match
# self.ignore_case = ignore_case
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, StrMatch):
# return False
#
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
#
# def _parse(self, parser):
# token = parser.get_token()
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
# else token.value == self.to_match
#
# if m:
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
# parser.next_token()
# return node
#
# return None
#
#
# class BnfNodeParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("BnfNode_old", 50)
# self.enabled = False
# if 'grammars' in kwargs:
# self.concepts_grammars = kwargs.get("grammars")
# elif 'sheerka' in kwargs:
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
# else:
# self.concepts_grammars = {}
#
# self.ignore_case = True
#
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context = None
# self.text = None
# self.sheerka = None
#
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.next_token()
# return error
#
# def reset_parser(self, context, text):
# self.context = context
# self.sheerka = context.sheerka
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
# except core.tokenizer.LexerError as e:
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
# return False
#
# self.token = None
# self.pos = -1
# self.next_token(False)
# return True
#
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
#
# def seek(self, pos):
# self.pos = pos
# self.token = self.tokens[self.pos]
# return True
#
# def rewind(self, offset, skip_whitespace=True):
# self.pos += offset
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
# self.pos -= 1
# self.token = self.tokens[self.pos]
#
# def initialize(self, context, concepts_definitions):
# """
# Adds a bunch of concepts, and how they can be recognized
# :param context: execution context
# :param concepts_definitions: dictionary of concept, concept_definition
# :return:
# """
#
# self.context = context
# self.sheerka = context.sheerka
# concepts_to_resolve = set()
#
# for concept, concept_def in concepts_definitions.items():
# # ## Gets the grammars
# context.log(f"Resolving grammar for '{concept}'", context.who)
# concept.init_key() # make sure that the key is initialized
# grammar = self.get_model(concept_def, concepts_to_resolve)
# self.concepts_grammars[concept] = grammar
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
#
# # ## Removes concepts with infinite recursions
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
# for concept in concepts_to_remove:
# concepts_to_resolve.remove(concept)
# del self.concepts_grammars[concept]
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
# else:
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
#
# def get_concept(self, concept_name):
# if concept_name in self.context.concepts:
# return self.context.concepts[concept_name]
# return self.sheerka.get_by_key(concept_name)
#
# def get_model(self, concept_def, concepts_to_resolve):
#
# # TODO
# # inner_get_model must not modify the initial ParsingExpression
# # A copy must be created
# def inner_get_model(expression):
# if isinstance(expression, Concept):
# if self.sheerka.isaset(self.context, expression):
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
# else:
# ret = ConceptExpression(expression, rule_name=expression.name)
# concepts_to_resolve.add(expression)
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
# if expression.rule_name is None or expression.rule_name == "":
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
# else expression.concept
# if isinstance(expression.concept, str):
# concept = self.get_concept(expression.concept)
# if self.sheerka.is_known(concept):
# expression.concept = concept
# concepts_to_resolve.add(expression.concept)
# ret = expression
# elif isinstance(expression, str):
# ret = StrMatch(expression, ignore_case=self.ignore_case)
# elif isinstance(expression, StrMatch):
# ret = expression
# if ret.ignore_case is None:
# ret.ignore_case = self.ignore_case
# elif isinstance(expression, Sequence) or \
# isinstance(expression, OrderedChoice) or \
# isinstance(expression, ZeroOrMore) or \
# isinstance(expression, OneOrMore) or \
# isinstance(expression, Optional):
# ret = expression
# ret.nodes = [inner_get_model(e) for e in ret.elements]
# else:
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
#
# # Translate separator expression.
# if isinstance(expression, Repetition) and expression.sep:
# expression.sep = inner_get_model(expression.sep)
#
# return ret
#
# model = inner_get_model(concept_def)
#
# return model
#
# def detect_infinite_recursion(self, concepts_to_resolve):
#
# # infinite recursion matcher
# def _is_infinite_recursion(ref_concept, node):
# if isinstance(node, ConceptExpression):
# if node.concept == ref_concept:
# return True
#
# if isinstance(node.concept, str):
# to_match = self.get_concept(node.concept)
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return False
# else:
# to_match = node.concept
#
# if to_match not in self.concepts_grammars:
# return False
#
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
#
# if isinstance(node, OrderedChoice):
# return _is_infinite_recursion(ref_concept, node.nodes[0])
#
# if isinstance(node, Sequence):
# for node in node.nodes:
# if _is_infinite_recursion(ref_concept, node):
# return True
# return False
#
# return False
#
# removed_concepts = []
# for e in concepts_to_resolve:
# if isinstance(e, str):
# e = self.get_concept(e)
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
# continue
#
# if e not in self.concepts_grammars:
# continue
#
# to_resolve = self.concepts_grammars[e]
# if _is_infinite_recursion(e, to_resolve):
# removed_concepts.append(e)
# return removed_concepts
#
# def parse(self, context, parser_input):
# if parser_input == "":
# return context.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
# )
#
# if not self.reset_parser(context, parser_input):
# return self.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
#
# concepts_found = [[]]
# unrecognized_tokens = None
# has_unrecognized = False
#
# # actually list of list
# # The first dimension is the number of possibilities found
# # The second dimension is the number of concepts found, under one possibility
# #
# # Example 1
# # concept foo : 'one' 'two'
# # concept bar : 'one' 'two'
# # input 'one two' -> will produce two possibilities (foo and bar).
# #
# # Example 2
# # concept foo : 'one'
# # concept bar : 'two'
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
#
# while True:
# init_pos = self.pos
# res = []
#
# for concept, grammar in self.concepts_grammars.items():
# self.seek(init_pos)
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
# if node is not None and node.end != -1:
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
# concept_node = ConceptNode(
# updated_concept,
# node.start,
# node.end,
# self.tokens[node.start: node.end + 1],
# None,
# node)
# res.append(concept_node)
#
# if len(res) == 0: # not recognized
# self.seek(init_pos)
# if unrecognized_tokens:
# unrecognized_tokens.add_token(self.get_token(), init_pos)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
#
# if not self.next_token(False):
# break
#
# else: # some concepts are recognized
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
# unrecognized_tokens = None
#
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
# concepts_found = core.utils.product(concepts_found, res)
#
# # loop
# self.seek(res[0].end)
# if not self.next_token(False):
# break
#
# # Fix the source for unrecognized tokens
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
#
# # else
# # returns as many ReturnValue than choices found
# ret = []
# for choice in concepts_found:
# ret.append(
# self.sheerka.ret(
# self.name,
# not has_unrecognized,
# self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input,
# body=choice,
# try_parsed=choice)))
#
# if len(ret) == 1:
# self.log_result(context, parser_input, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input, ret)
# return ret
#
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
# """
# Updates the properties of the concept
# Goes in recursion if the property is a concept
# """
#
# # this cache is to make sure that we return the same concept for the same ConceptExpression
# _underlying_value_cache = {}
#
# def _add_prop(_concept, prop_name, value):
# """
# Adds a new entry,
# makes a list if the property already exists
# """
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
# # new entry
# _concept.compiled[prop_name] = value
# else:
# # make a list if there was a value
# previous_value = _concept.compiled[prop_name]
# if isinstance(previous_value, list):
# previous_value.append(value)
# else:
# new_value = [previous_value, value]
# _concept.compiled[prop_name] = new_value
#
# def _look_for_concept_match(_underlying):
# """
# At some point, there is either an StrMatch or a ConceptMatch,
# that allowed the recognition.
# Look for the ConceptMatch, with recursion if needed
# """
# if isinstance(_underlying.parsing_expression, ConceptExpression):
# return _underlying
#
# if not isinstance(_underlying, NonTerminalNode):
# return None
#
# if len(_underlying.children) != 1:
# return None
#
# return _look_for_concept_match(_underlying.children[0])
#
# def _get_underlying_value(_underlying):
# concept_match_node = _look_for_concept_match(_underlying)
# if concept_match_node:
# # the value is a concept
# if id(concept_match_node) in _underlying_value_cache:
# result = _underlying_value_cache[id(concept_match_node)]
# else:
# ref_tpl = concept_match_node.parsing_expression.concept
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
# _underlying_value_cache[id(concept_match_node)] = result
# else:
# # the value is a string
# result = DoNotResolve(_underlying.source)
#
# return result
#
# def _process_rule_name(_concept, _underlying):
# if _underlying.parsing_expression.rule_name:
# value = _get_underlying_value(_underlying)
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
# _concept.metadata.need_validation = True
#
# if isinstance(_underlying, NonTerminalNode):
# for child in _underlying.children:
# _process_rule_name(_concept, child)
#
# key = (template.key, template.id) if template.id else template.key
# concept = sheerka.new(key)
# if init_empty_body and concept.metadata.body is None:
# value = _get_underlying_value(underlying)
# concept.compiled[ConceptParts.BODY] = value
# if underlying.parsing_expression.rule_name:
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
# # KSI : Why don't we set concept.metadata.need_validation to True ?
#
# if isinstance(underlying, NonTerminalNode):
# for node in underlying.children:
# _process_rule_name(concept, node)
#
# return concept
#
# def encode_grammar(self, grammar):
# """
# Transform the grammar into something that can easily can be serialized
# :param grammar:
# :return:
# """
#
# def _encode(expression):
# if isinstance(expression, StrMatch):
# res = f"'{expression.to_match}'"
#
# elif isinstance(expression, ConceptExpression):
# res = core.utils.str_concept(expression.concept)
#
# elif isinstance(expression, Sequence):
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, OrderedChoice):
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, Optional):
# res = _encode(expression.nodes[0]) + "?"
#
# elif isinstance(expression, ZeroOrMore):
# res = _encode(expression.nodes[0]) + "*"
#
# elif isinstance(expression, OneOrMore):
# res = _encode(expression.nodes[0]) + "+"
#
# if expression.rule_name:
# res += "=" + expression.rule_name
#
# return res
#
# result = {}
# for k, v in grammar.items():
# key = core.utils.str_concept(k)
# value = _encode(v)
# result[key] = value
# return result
#
# @staticmethod
# def get_bests(results):
# """
# Returns the result that is the longest
# :param results:
# :return:
# """
# by_end_pos = defaultdict(list)
# for result in results:
# by_end_pos[result.end].append(result)
#
# return by_end_pos[max(by_end_pos)]
#
#
# class ParsingExpressionVisitor:
# """
# visit ParsingExpression
# """
#
# def visit(self, parsing_expression):
# name = parsing_expression.__class__.__name__
#
# method = 'visit_' + name
# visitor = getattr(self, method, self.generic_visit)
# return visitor(parsing_expression)
#
# def generic_visit(self, parsing_expression):
# if hasattr(self, "visit_all"):
# self.visit_all(parsing_expression)
#
# for node in parsing_expression.elements:
# if isinstance(node, Concept):
# self.visit(ConceptExpression(node.key or node.name))
# elif isinstance(node, str):
# self.visit(StrMatch(node))
# else:
# self.visit(node)
-108
View File
@@ -1,108 +0,0 @@
# # try to match something like
# # ConceptNode 'plus' ConceptNode
# #
# # Replaced by SyaNodeParser
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind, Token
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from core.concept import VARIABLE_PREFIX
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# class ConceptsWithConceptsParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("ConceptsWithConcepts", 25)
# self.enabled = False
#
# @staticmethod
# def get_tokens(nodes):
# tokens = []
#
# for node in nodes:
# if isinstance(node, ConceptNode):
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
# else:
# for token in node.tokens:
# if token.type == TokenKind.EOF:
# break
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
# continue
# else:
# tokens.append(token)
#
# return tokens
#
# @staticmethod
# def get_key(nodes):
# key = ""
# index = 0
# for node in nodes:
# if key:
# key += " "
#
# if isinstance(node, UnrecognizedTokensNode):
# key += node.source.strip()
# else:
# key += f"{VARIABLE_PREFIX}{index}"
# index += 1
#
# return key
#
# def finalize_concept(self, context, concept, nodes):
# index = 0
# for node in nodes:
#
# if isinstance(node, ConceptNode):
# prop_name = list(concept.props.keys())[index]
# concept.compiled[prop_name] = node.concept
# context.log(
# f"Setting property '{prop_name}='{node.concept}'.",
# self.name)
# index += 1
# elif isinstance(node, SourceCodeNode):
# prop_name = list(concept.props.keys())[index]
# sheerka = context.sheerka
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
# context.log(
# f"Setting property '{prop_name}'='Python({node.source})'.",
# self.name)
# index += 1
#
# return concept
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
# if not nodes:
# return None
#
# concept_key = self.get_key(nodes)
# concept = sheerka.new(concept_key)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# return sheerka.ret(
# self.name,
# False,
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
#
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# for concept in concepts:
# self.finalize_concept(context, concept, nodes)
#
# res = []
# for concept in concepts:
# res.append(sheerka.ret(
# self.name,
# True,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=concept,
# try_parsed=None)))
#
# return res[0] if len(res) == 1 else res
-163
View File
@@ -1,163 +0,0 @@
# # to be replaced by SyaNodeParser
# import ast
#
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind
# from parsers.BaseNodeParser import SourceCodeNode
# from parsers.BaseParser import BaseParser
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
# import core.utils
# from parsers.PythonParser import PythonParser
#
# concept_lexer_parser = BnfNodeParser()
#
#
# class MultipleConceptsParser(BaseParser):
# """
# Parser that will take the result of BnfNodeParser and
# try to resolve the unrecognized tokens token by token
#
# It is a success when it returns a list ConceptNode exclusively
# """
#
# def __init__(self, **kwargs):
# BaseParser.__init__(self, "MultipleConcepts", 45)
# self.enabled = False
#
# @staticmethod
# def finalize(nodes_found, unrecognized_tokens):
# if not unrecognized_tokens:
# return nodes_found, unrecognized_tokens
#
# unrecognized_tokens.fix_source()
# if unrecognized_tokens.not_whitespace():
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
#
# return nodes_found, None
#
# @staticmethod
# def create_or_add(unrecognized_tokens, token, index):
# if unrecognized_tokens:
# unrecognized_tokens.add_token(token, index)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
# return unrecognized_tokens
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
# if not nodes:
# return None
#
# nodes_found = [[]]
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# unrecognized_tokens = None
# i = 0
#
# while i < len(node.tokens):
#
# token_index = node.start + i
# token = node.tokens[i]
#
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
# if concepts_nodes is not None:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
# i += 1
# continue
#
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
# if source_code_node:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, [source_code_node])
# i += len(source_code_node.tokens)
# continue
#
# # not a concept nor some source code
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
# i += 1
#
# # finish processing if needed
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
#
# else:
# nodes_found = core.utils.product(nodes_found, [node])
#
# ret = []
# for choice in nodes_found:
# ret.append(
# sheerka.ret(
# self.name,
# concepts_only,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=choice,
# try_parsed=None))
# )
#
# if len(ret) == 1:
# self.log_result(context, parser_input.source, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input.source, ret)
# return ret
#
# @staticmethod
# def get_concepts_nodes(context, index, token):
# """
# Tries to recognize a concept
# from the univers of all known concepts
# """
#
# if token.type != TokenKind.IDENTIFIER:
# return None
#
# concept = context.new_concept(token.value)
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
# return concepts_nodes
#
# return None
#
# @staticmethod
# def get_source_code_node(context, index, tokens):
# """
# Tries to recognize source code.
# For the time being, only Python is supported
# :param context:
# :param tokens:
# :param index:
# :return:
# """
#
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
# return None
#
# end_index = len(tokens)
# while end_index > 0:
# parser = PythonParser()
# tokens_to_parse = tokens[:end_index]
# res = parser.parse(context, tokens_to_parse)
# if res.status:
# # only expression are accepted
# ast_ = res.value.value.ast_
# if not isinstance(ast_, ast.Expression):
# return None
# try:
# compiled = compile(ast_, "<string>", "eval")
# eval(compiled, {}, {})
# except Exception:
# return None
#
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
# end_index -= 1
#
# return None
+2 -2
View File
@@ -12,10 +12,10 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF
("foo", ["foo"], "foo"),
("foo a", ["foo"], "__var__0 a"),
("foo a b", ["a"], "foo __var__0 b"),
("'foo'", [], "foo"),
("'foo'", [], "'foo'"),
("my name is a", ["a"], "my name is __var__0"),
("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
("a 'b c' d", ["b", "c"], "a b c d"),
("a 'b c' d", ["b", "c"], "a 'b c' d"),
("a | b", ["a", "b"], "__var__0 | __var__1"),
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
+16 -2
View File
@@ -4,7 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`==#"
source += "$£€!_identifier°~_^\\`==#__var__10"
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -54,8 +54,9 @@ def test_i_can_tokenize():
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51)
assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53)
assert tokens[48] == Token(TokenKind.VAR_DEF, '__var__10', 112, 6, 54)
assert tokens[48] == Token(TokenKind.EOF, '', 112, 6, 54)
assert tokens[49] == Token(TokenKind.EOF, '', 121, 6, 63)
@pytest.mark.parametrize("text, expected", [
@@ -88,6 +89,19 @@ def test_i_can_parse_word(text):
assert tokens[1].index == len(text)
@pytest.mark.parametrize("text", [
"__var__0",
"__var__1",
"__var__10",
"__var__999",
])
def test_i_can_parse_var_def(text):
tokens = list(Tokenizer(text))
assert len(tokens) == 2
assert tokens[0].type == TokenKind.VAR_DEF
assert tokens[0].value == text
@pytest.mark.parametrize("text, message, error_text, index, line, column", [
("'string", "Missing Trailing quote", "'string", 7, 1, 8),
('"string', "Missing Trailing quote", '"string', 7, 1, 8),
+2 -2
View File
@@ -36,9 +36,9 @@ def compute_debug_array(res):
if token.type == TokenKind.WHITESPACE:
continue
else:
res_debug.append(token.value)
res_debug.append("T(" + token.value + ")")
else:
res_debug.append(token.concept.name)
res_debug.append("C(" + token.concept.name + ")")
to_compare.append(res_debug)
return to_compare
+2 -2
View File
@@ -218,8 +218,8 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("hello foo bar",
[
(True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]),
(True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]),
(True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]),
(True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]),
]),
])
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
File diff suppressed because it is too large Load Diff
@@ -1,193 +0,0 @@
# import ast
#
# import pytest
#
# from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
# from core.concept import Concept
# from core.tokenizer import Token, TokenKind, Tokenizer
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from parsers.PythonParser import PythonNode
#
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# def ret_val(*args):
# result = []
# index = 0
# source = ""
# for item in args:
# if isinstance(item, Concept):
# tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
# result.append(ConceptNode(item, index, index, tokens, item.name))
# index += 1
# source += item.name
# elif isinstance(item, PythonNode):
# tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
# result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
# index += len(tokens)
# source += item.source
# else:
# tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
# result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
# index += len(tokens)
# source += item
#
# return ReturnValueConcept(
# "who",
# False,
# ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
#
#
# class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
#
# def init(self, concepts, inputs):
# context = self.get_context()
# for concept in concepts:
# context.sheerka.create_new_concept(context, concept)
#
# return context, ret_val(*inputs)
#
# def execute(self, concepts, inputs):
# context, input_return_values = self.init(concepts, inputs)
#
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
#
# wrapper = result.body
# return_value = result.body.body
#
# return context, parser, result, wrapper, return_value
#
# @pytest.mark.parametrize("text, interested", [
# ("not parser result", False),
# (ParserResultConcept(parser="not multiple_concepts_parser"), False),
# (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
# ])
# def test_not_interested(self, text, interested):
# context = self.get_context()
#
# res = ConceptsWithConceptsParser().parse(context, text)
# if interested:
# assert res is not None
# else:
# assert res is None
#
# def test_i_can_parse_composition_of_concepts(self):
# foo = Concept("foo")
# bar = Concept("bar")
# plus = Concept("a plus b").def_var("a").def_var("b")
#
# context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus)
#
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == foo.init_key()
# assert evaluated.get_prop("b") == bar.init_key()
#
# def test_i_can_parse_when_composition_of_source_code(self):
# plus = Concept("a plus b", body="a + b").def_var("a").def_var("b")
# left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
# right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
# context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "1+1 plus 2+2"
# assert context.sheerka.isinstance(return_value, plus)
#
# left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
# right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
# assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == 2
# assert evaluated.get_prop("b") == 4
# assert evaluated.body == 6
#
# def test_i_can_parse_when_mix_of_concept_and_code(self):
# plus = Concept("a plus b").def_var("a").def_var("b")
# code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
# foo = Concept("foo")
# context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code])
#
# assert result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert result.who == wrapper.parser.name
# assert wrapper.source == "foo plus 1+1"
# assert context.sheerka.isinstance(return_value, plus)
#
# code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
#
# # sanity check, I can evaluate the result
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
# assert evaluated.key == return_value.key
# assert evaluated.get_prop("a") == foo.init_key()
# assert evaluated.get_prop("b") == 2
#
# def test_i_can_parse_when_multiple_concepts_are_recognized(self):
# foo = Concept("foo")
# bar = Concept("bar")
# plus_1 = Concept("a plus b", body="body1").def_var("a").def_var("b")
# plus_2 = Concept("a plus b", body="body2").def_var("a").def_var("b")
#
# context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
#
# assert len(result) == 2
#
# res = result[0]
# wrapper = res.value
# return_value = res.value.value
# assert res.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert res.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus_1)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# res = result[1]
# wrapper = res.value
# return_value = res.value.value
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert res.who == wrapper.parser.name
# assert wrapper.source == "foo plus bar"
# assert context.sheerka.isinstance(return_value, plus_2)
# assert return_value.compiled["a"] == foo
# assert return_value.compiled["b"] == bar
#
# def test_i_cannot_parse_when_unknown_concept(self):
# foo = Concept("foo")
# bar = Concept("bar")
#
# context, input_return_values = self.init([foo, bar], [foo, " plus ", bar])
# parser = ConceptsWithConceptsParser()
# result = parser.parse(context, input_return_values.body)
# wrapper = result.body
# return_value = result.body.body
#
# assert not result.status
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
# assert result.who == parser.name
# assert return_value == input_return_values.body.body
@@ -1,216 +0,0 @@
# import pytest
#
# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
# from core.concept import Concept
# from core.tokenizer import Tokenizer, TokenKind, Token
# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode
# from parsers.BnfNodeParser import BnfNodeParser, Sequence
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from parsers.PythonParser import PythonNode
#
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
#
#
# def get_return_value(context, grammar, expression):
# parser = BnfNodeParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return ret_val
#
#
# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
#
# def init(self, concepts, grammar, expression):
# context = self.get_context()
# for c in concepts:
# context.sheerka.create_new_concept(context, c)
# return_value = get_return_value(context, grammar, expression)
#
# return context, return_value
#
# def test_not_interested_if_not_parser_result(self):
# context = self.get_context()
# text = "not parser result"
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_not_interested_if_not_from_concept_lexer_parser(self):
# context = self.get_context()
# text = ParserResultConcept(parser="not concept lexer", value="some value")
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_i_can_parse_exact_concepts(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# baz = Concept("baz", body="'baz'")
# grammar = {}
# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# ConceptNode(bar, 0, 0, source="bar"),
# ConceptNode(foo, 2, 2, source="foo"),
# ConceptNode(baz, 4, 4, source="baz")]
# assert ret_val.value.source == "bar foo baz"
#
# def test_i_can_parse_when_ending_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
# assert ret_val.value.source == "bar foo1 foo2 foo3"
#
# def test_i_can_parse_when_starting_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
# assert ret_val.value.source == "foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_concept_are_mixed(self):
# foo = Concept("foo")
# bar = Concept("bar")
# baz = Concept("baz")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("baz", 0, 0, "baz"),
# cnode("foo", 2, 6, "foo1 foo2 foo3"),
# cnode("bar", 8, 8, "bar")]
# assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_multiple_concepts_are_matching(self):
# foo = Concept("foo")
# bar = Concept("bar", body="bar1")
# baz = Concept("bar", body="bar2")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert len(ret_val) == 2
# assert ret_val[0].status
# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[0].value.source == "foo bar"
# assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
#
# assert ret_val[1].status
# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[1].value.source == "foo bar"
# assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
#
# def test_i_can_parse_when_source_code(self):
# foo = Concept("foo")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo], grammar, "1 foo")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
# wrapper = ret_val.value
# value = ret_val.value.value
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert wrapper.source == "1 foo"
# assert value == [
# scnode(0, 1, "1 "),
# cnode("foo", 2, 2, "foo")]
#
# def test_i_cannot_parse_when_unrecognized_token(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " + "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two + one"
#
# def test_i_cannot_parse_when_unknown_concepts(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " plus "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two plus one"
#
# @pytest.mark.parametrize("text, expected_source, expected_end", [
# ("True", "True", 0),
# ("1 == 1", "1 == 1", 4),
# ("1!xdf", "1", 0),
# ("1", "1", 0),
# ])
# def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
#
# start_index = 5 # a random number different of zero
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
#
# assert isinstance(res, SourceCodeNode)
# assert isinstance(res.node, PythonNode)
# assert res.source == expected_source
# assert res.start == start_index
# assert res.end == start_index + expected_end
#
# def test_i_cannot_parse_null_text(self):
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
# assert res is None
#
# eof = Token(TokenKind.EOF, "", 0, 0, 0)
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
# assert res is None
+204 -327
View File
@@ -31,6 +31,7 @@ cmap = {
"if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
"square": Concept("square(a)").def_var("a"),
"foo bar": Concept("foo bar(a)").def_var("a"),
"long infixed": Concept("a long infixed b").def_var("a").def_var("b"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
}
@@ -50,8 +51,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
TestSyaNodeParser.sheerka.force_sya_def(context, [
(cmap["plus"].id, 5, SyaAssociativity.Right),
(cmap["mult"].id, 10, SyaAssociativity.Right),
(cmap["minus"].id, 10, SyaAssociativity.Right),
(cmap["square"].id, None, SyaAssociativity.No)])
(cmap["minus"].id, 10, SyaAssociativity.Right)])
def init_parser(self,
my_concepts_map=None,
@@ -98,99 +98,92 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("one plus two", [["one", "two", "plus"]]),
("1 + 1 plus two", [["1 + 1 ", "two", "plus"]]),
("1 + 1 plus two", [["1 + 1", "two", "plus"]]),
("one + two plus three", [
["one", " + ", "two", "three", "plus"],
["one + two ", "three", "plus"]]),
["one + two", "three", "plus"]]),
("twenty one plus two", [
["twenty ", "one", "two", "plus"],
[short_cnode("twenties", "twenty one"), "two", "plus"]
]),
("x$!# plus two", [["x$!# ", "two", "plus"]]),
("x$!# plus two", [["x$!#", "two", "plus"]]),
("one plus 1 + 1", [["one", " 1 + 1", "plus"]]),
("1 + 1 plus 2 + 2", [["1 + 1 ", " 2 + 2", "plus"]]),
("one plus 1 + 1", [["one", "1 + 1", "plus"]]),
("1 + 1 plus 2 + 2", [["1 + 1", "2 + 2", "plus"]]),
("one + two plus 1 + 1", [
["one", " + ", "two", " 1 + 1", "plus"],
["one + two ", " 1 + 1", "plus"]
["one", " + ", "two", "1 + 1", "plus"],
["one + two", "1 + 1", "plus"]
]),
("twenty one plus 1 + 1", [
["twenty ", "one", " 1 + 1", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " 1 + 1", "plus"]
["twenty ", "one", "1 + 1", "plus"],
[cnode("twenties", 0, 2, "twenty one"), "1 + 1", "plus"]
]),
("x$!# plus 1 + 1", [["x$!# ", " 1 + 1", "plus"]]),
("x$!# plus 1 + 1", [["x$!#", "1 + 1", "plus"]]),
("one plus two + three", [
["one", "two", "plus", " + ", "three"],
["one", " two + three", "plus"],
["one", "two + three", "plus"],
]),
("1 + 1 plus two + three", [
["1 + 1 ", "two", "plus", (" + ", 1), "three"],
["1 + 1 ", " two + three", "plus"],
["1 + 1", "two", "plus", (" + ", 1), "three"],
["1 + 1", "two + three", "plus"],
]),
("one + two plus two + three", [
["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"],
["one + two ", ("two", 1), "plus", (" + ", 1), "three"],
["one", " + ", "two", " two + three", "plus"],
["one + two ", " two + three", "plus"],
["one + two", ("two", 1), "plus", (" + ", 1), "three"],
["one", " + ", "two", "two + three", "plus"],
["one + two", "two + three", "plus"],
]),
("twenty one plus two + three", [
["twenty ", "one", "two", "plus", " + ", "three"],
[cnode("twenties", 0, 2, "twenty one"), "two", "plus", " + ", "three"],
["twenty ", "one", " two + three", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " two + three", "plus"],
["twenty ", "one", "two + three", "plus"],
[cnode("twenties", 0, 2, "twenty one"), "two + three", "plus"],
]),
("x$!# plus two + three", [
["x$!# ", "two", "plus", " + ", "three"],
["x$!# ", " two + three", "plus"],
["x$!#", "two", "plus", " + ", "three"],
["x$!#", "two + three", "plus"],
]),
("one plus twenty two", [
["one", " twenty ", "plus", "two"],
["one", "twenty ", "plus", "two"],
["one", cnode("twenties", 4, 6, "twenty two"), "plus"],
]),
("1 + 1 plus twenty one", [
["1 + 1 ", " twenty ", "plus", "one"],
["1 + 1 ", cnode("twenties", 8, 10, "twenty one"), "plus"],
["1 + 1", "twenty ", "plus", "one"],
["1 + 1", cnode("twenties", 8, 10, "twenty one"), "plus"],
]),
("one + two plus twenty one", [
["one", " + ", "two", " twenty ", "plus", ("one", 1)],
["one + two ", " twenty ", "plus", ("one", 1)],
["one", " + ", "two", "twenty ", "plus", ("one", 1)],
["one + two", "twenty ", "plus", ("one", 1)],
["one", " + ", "two", cnode("twenties", 8, 10, "twenty one"), "plus"],
["one + two ", cnode("twenties", 8, 10, "twenty one"), "plus"],
["one + two", cnode("twenties", 8, 10, "twenty one"), "plus"],
]),
("twenty one plus twenty two",
[
["twenty ", "one", " twenty ", "plus", "two"],
[cnode("twenties", 0, 2, "twenty one"), " twenty ", "plus", "two"],
["twenty ", "one", ("twenty ", 1), "plus", "two"],
[cnode("twenties", 0, 2, "twenty one"), ("twenty ", 1), "plus", "two"],
["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"],
[cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"],
]),
("x$!# plus twenty two", [
["x$!# ", " twenty ", "plus", "two"],
["x$!# ", cnode("twenties", 7, 9, "twenty two"), "plus"]
["x$!#", "twenty ", "plus", "two"],
["x$!#", cnode("twenties", 7, 9, "twenty two"), "plus"]
]),
("one plus z$!#", [["one", " z$!#", "plus"]]),
("1 + 1 plus z$!#", [["1 + 1 ", " z$!#", "plus"]]),
("one plus z$!#", [["one", "z$!#", "plus"]]),
("1 + 1 plus z$!#", [["1 + 1", "z$!#", "plus"]]),
("one + two plus z$!#", [
["one", " + ", "two", " z$!#", "plus"],
["one + two ", " z$!#", "plus"],
["one", " + ", "two", "z$!#", "plus"],
["one + two", "z$!#", "plus"],
]),
("twenty one plus z$!#", [
["twenty ", "one", " z$!#", "plus"],
[cnode("twenties", 0, 2, "twenty one"), " z$!#", "plus"],
["twenty ", "one", "z$!#", "plus"],
[cnode("twenties", 0, 2, "twenty one"), "z$!#", "plus"],
]),
("x$!# plus z$!#", [["x$!# ", " z$!#", "plus"]]),
("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]),
])
def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -202,10 +195,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res_i.out == expected_array
@pytest.mark.parametrize("expression, expected_sequences", [
("one plus plus plus 1 + 1", [["one", " 1 + 1", "plus plus plus"]]),
("one plus plus plus 1 + 1", [["one", "1 + 1", "plus plus plus"]]),
("x$!# another long name infix twenty two", [
["x$!# ", " twenty ", "another long name infix", "two"],
["x$!# ", cnode("twenties", 13, 15, "twenty two"), "another long name infix"],
["x$!#", "twenty ", "another long name infix", "two"],
["x$!#", cnode("twenties", 13, 15, "twenty two"), "another long name infix"],
]),
])
def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences):
@@ -229,24 +222,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("one prefixed", [["one", "prefixed"]]),
("1 + 1 prefixed", [["1 + 1 ", "prefixed"]]),
("1 + 1 prefixed", [["1 + 1", "prefixed"]]),
("one + two prefixed", [
["one", " + ", "two", "prefixed"],
["one + two ", "prefixed"],
["one + two", "prefixed"],
]),
("twenty one prefixed", [
["twenty ", "one", "prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "prefixed"],
]),
("x$!# prefixed", [["x$!# ", "prefixed"]]),
("x$!# prefixed", [["x$!#", "prefixed"]]),
])
def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -259,28 +246,28 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("one prefixed prefixed", [["one", "prefixed prefixed"]]),
("1 + 1 prefixed prefixed", [["1 + 1 ", "prefixed prefixed"]]),
("1 + 1 prefixed prefixed", [["1 + 1", "prefixed prefixed"]]),
("one + two prefixed prefixed", [
["one", " + ", "two", "prefixed prefixed"],
["one + two ", "prefixed prefixed"],
["one + two", "prefixed prefixed"],
]),
("twenty one prefixed prefixed", [
["twenty ", "one", "prefixed prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "prefixed prefixed"],
]),
("x$!# prefixed prefixed", [["x$!# ", "prefixed prefixed"]]),
("x$!# prefixed prefixed", [["x$!#", "prefixed prefixed"]]),
("one long name prefixed", [["one", "long name prefixed"]]),
("1 + 1 long name prefixed", [["1 + 1 ", "long name prefixed"]]),
("1 + 1 long name prefixed", [["1 + 1", "long name prefixed"]]),
("one + two long name prefixed", [
["one", " + ", "two", "long name prefixed"],
["one + two ", "long name prefixed"],
["one + two", "long name prefixed"],
]),
("twenty one long name prefixed", [
["twenty ", "one", "long name prefixed"],
[cnode("twenties", 0, 2, "twenty one"), "long name prefixed"],
]),
("x$!# long name prefixed", [["x$!# ", "long name prefixed"]]),
("x$!# long name prefixed", [["x$!#", "long name prefixed"]]),
])
def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences):
concepts_map = {
@@ -302,24 +289,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("suffixed one", [["one", "suffixed"]]),
("suffixed 1 + 1", [[" 1 + 1", "suffixed"]]),
("suffixed 1 + 1", [["1 + 1", "suffixed"]]),
("suffixed one + two", [
["one", "suffixed", " + ", "two"],
[" one + two", "suffixed"],
["one + two", "suffixed"],
]),
("suffixed twenty one", [
[" twenty ", "suffixed", "one"],
["twenty ", "suffixed", "one"],
[cnode("twenties", 2, 4, "twenty one"), "suffixed"],
]),
("suffixed x$!#", [[" x$!#", "suffixed"]]),
("suffixed x$!#", [["x$!#", "suffixed"]]),
])
def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -351,26 +332,27 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("one ? two : three", [["one", "two", "three", "?"]]),
("one ? baz qux : two", [["one", "baz qux", "two", "?"]]),
("1+1 ? one + two : twenty one", [
["1+1 ", "one", " + ", "two"], # an error is detected
["1+1 ", " one + two ", " twenty ", "?", ("one", 1)],
["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "?"],
["1+1", "one", " + ", "two"], # error is detected so the parsing has stopped
["1+1", "one + two", "twenty ", "?", ("one", 1)],
["1+1", "one + two", short_cnode("twenties", "twenty one"), "?"],
]),
("x$!# ? y$!# : z$!#", [["x$!# ", " y$!# ", " z$!#", "?"]]),
("x$!# ? y$!# : z$!#", [["x$!#", "y$!#", "z$!#", "?"]]),
("if one then two else three end", [["one", "two", "three", "if"]]),
("if 1+1 then x$!# else twenty one end", [
[" 1+1 ", " x$!# ", " twenty ", "one"], # an error is detected
[" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if"],
["1+1", "x$!#", "twenty ", "one"], # an error is detected
["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if"],
]),
("if x$!# then one + two else z$!# end", [
[" x$!# ", "one", " + ", "two"], # an error is detected
[" x$!# ", " one + two ", " z$!# ", "if"],
["x$!#", "one", " + ", "two"], # error is detected so the parsing has stopped
["x$!#", "one + two", "z$!#", "if"],
]),
])
def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences):
"""
The purpose of this test is to validate concepts like
The purpose of this test is to validate concepts
that have at least 3 parameters separated by tokens
Example :
var_0 token var_1 token var_2
@@ -381,14 +363,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
:return:
"""
# concepts_map = {
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -402,15 +376,15 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected_sequences", [
("one ? ? two : : three", [["one", "two", "three", "? ?"]]),
("1+1 ? ? one + two : : twenty one", [
["1+1 ", "one", " + ", "two"], # error
["1+1 ", " one + two ", " twenty ", "? ?", ("one", 1)],
["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "? ?"],
["1+1", "one", " + ", "two"], # error
["1+1", "one + two", "twenty ", "? ?", ("one", 1)],
["1+1", "one + two", short_cnode("twenties", "twenty one"), "? ?"],
]),
("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]),
("if if 1+1 then then x$!# else else twenty one end end ", [
[" 1+1 ", " x$!# ", " twenty ", "one"], # error
[" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if if"]]),
["1+1", "x$!#", "twenty ", "one"], # error
["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if if"]]),
])
def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences):
concepts_map = {
@@ -433,8 +407,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("foo bar baz", ["baz", "bar", "foo"]),
("foo bar x$!#", [" x$!#", "bar", "foo"]),
("foo bar 1 + 1", [" 1 + 1", "bar", "foo"]),
("foo bar x$!#", ["x$!#", "bar", "foo"]),
("foo bar 1 + 1", ["1 + 1", "bar", "foo"]),
])
def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected):
concepts_map = {
@@ -452,8 +426,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("baz bar foo", ["baz", "bar", "foo"]),
("x$!# bar foo", ["x$!# ", "bar", "foo"]),
("1 + 1 bar foo", ["1 + 1 ", "bar", "foo"]),
("x$!# bar foo", ["x$!#", "bar", "foo"]),
("1 + 1 bar foo", ["1 + 1", "bar", "foo"]),
])
def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected):
concepts_map = {
@@ -480,17 +454,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]),
])
def test_i_can_post_fix_binary_with_precedence(self, expression, expected):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -566,7 +529,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
}
sya_def = {
concepts_map["plus"]: (None, SyaAssociativity.Left),
concepts_map["plus"]: (1, SyaAssociativity.Left),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
@@ -580,14 +543,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array
@pytest.mark.parametrize("expression, expected", [
("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "two", "three", ("?", 1), "?"]),
("x$!# ? y$!# : (z$!# ? two : three)", ["x$!# ", " y$!# ", "z$!# ", "two", "three", ("?", 1), "?"]),
("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]),
("x$!# ? y$!# : (z$!# ? two : three)", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]),
("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]),
("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!# ", " y$!# ", " z$!#", ("?", 1), "three", "?"]),
("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]),
("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]),
("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]),
])
def test_i_can_post_fix_right_associated_ternary(self, expression, expected):
concepts_map = {
@@ -607,14 +570,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array
@pytest.mark.parametrize("expression, expected", [
("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "?", "two", "three", ("?", 1)]),
("(x$!# ? y$!# : z$!#) ? two : three", ["x$!# ", " y$!# ", " z$!#", "?", "two", "three", ("?", 1)]),
("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]),
("(x$!# ? y$!# : z$!#) ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]),
# the following one is not possible when Left association
# ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]),
# ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", "?", " y$!# ", " z$!#", ("?", 1)]),
("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", " x$!#", "?", " y$!# ", " z$!#", ("?", 1)]),
("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]),
("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]),
])
def test_i_can_post_fix_left_associated_ternary(self, expression, expected):
concepts_map = {
@@ -644,7 +607,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
expression = "foo bar baz"
res = parser.infix_to_postfix(context, expression)
expected_sequences = [
[UTN(" bar "), "foo", "baz"],
[UTN("bar "), "foo", "baz"],
["baz", "foo bar"]
]
@@ -669,9 +632,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
("(one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]),
("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]),
@@ -681,32 +644,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]),
("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]),
("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]),
("( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]),
("(( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]),
("foo bar (one)", ["one", "foo bar"]),
("foo bar(one)", ["one", "foo bar"]),
("foo bar ( one )", ["one", "foo bar"]),
])
def test_i_can_pos_fix_when_parenthesis(self, expression, expected):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "square": Concept("square(a)").def_var("a"),
# "foo bar": Concept("foo bar(a)").def_var("a"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "minus": Concept("a minus b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
#
# sya_def = {
# concepts_map["square"]: (None, SyaAssociativity.No),
# concepts_map["plus"]: (10, SyaAssociativity.Right),
# concepts_map["minus"]: (10, SyaAssociativity.Right),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -721,14 +664,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]),
("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]),
("function(suffixed twenty two)", [
[SCWC("function(", ")", " twenty ", "suffixed", "two")],
[SCWC("function(", ")", "twenty ", "suffixed", "two")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]),
("function(twenty two prefixed)", [
[SCWC("function(", ")", "twenty ", "two", "prefixed")],
[SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")],
]),
("function(if one then twenty two else three end)", [
["')'", "one", " twenty ", "two"], # error
["')'", "one", "twenty ", "two"], # error
[SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")]
]),
("func1(func2(one two) three)", [
@@ -744,16 +687,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
]),
("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [
[SCWC("f1(", ")", "one", "two", "three", "mult", "plus"),
SCWC(" f2(", (")", 1), " x$!# ", "prefixed", "suffixed"),
SCWC("f2(", (")", 1), "x$!#", "prefixed", "suffixed"),
("plus", 1)]
]),
# plus, suffixed, prefixed, ternary
("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC(" func2(", (")", 1), "two"), "plus"]]),
("suffixed function(one)", [[SCWC(" function(", ")", "one"), "suffixed"]]),
("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC("func2(", (")", 1), "two"), "plus"]]),
("suffixed function(one)", [[SCWC("function(", ")", "one"), "suffixed"]]),
("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]),
("if f1(one) then f2(two) else f3(three) end", [
[SCWC(" f1(", ")", "one"), SCWC(" f2(", (")", 1), "two"), SCWC(" f3(", (")", 2), "three"), "if"]]),
[SCWC("f1(", ")", "one"), SCWC("f2(", (")", 1), "two"), SCWC("f3(", (")", 2), "three"), "if"]]),
# Sequence
("if one then two else three end function(x$!#)", [
@@ -762,21 +705,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]),
])
def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -787,28 +715,22 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res_i.out == expected_array
@pytest.mark.parametrize("expression, expected", [
("(", ("(", 0)),
("one plus ( 1 + ", ("(", 4)),
("one( 1 + ", ("(", 1)),
("one ( 1 + ", ("(", 2)),
("function( 1 + ", ("(", 1)),
("function ( 1 + ", ("(", 2)),
("one plus ) 1 + ", (")", 4)),
("one ) 1 + ", (")", 2)),
("function ) 1 + ", (")", 2)),
("one ? ( : two", ("(", 4)),
("one ? one plus ( : two", ("(", 8)),
("one ? ) : two", (")", 4)),
("one ? one plus ) : two", (")", 8)),
# ("(", ("(", 0)),
# ("one plus ( 1 + ", ("(", 4)),
# ("one( 1 + ", ("(", 1)),
# ("one ( 1 + ", ("(", 2)),
# ("function( 1 + ", ("(", 1)),
# ("function ( 1 + ", ("(", 2)),
# ("one plus ) 1 + ", (")", 4)),
# ("one ) 1 + ", (")", 2)),
# ("function ) 1 + ", (")", 2)),
# ("one ? ( : two", ("(", 4)),
# ("one ? one plus ( : two", ("(", 8)),
# ("one ? ) : two", (")", 4)),
# ("one ? one plus ) : two", (")", 8)),
("(one plus ( 1 + )", ("(", 0)),
])
def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected):
# concepts_map = {
# "one": Concept("one"),
# "two": Concept("two"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# }
sheerka, context, parser = self.init_parser()
res = parser.infix_to_postfix(context, expression)
@@ -820,12 +742,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one ? one two : three", ("?", ":")),
])
def test_i_can_detected_when_too_many_parameters(self, expression, expected):
# concepts_map = {
# "one": Concept("one"),
# "two": Concept("two"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
res = parser.infix_to_postfix(context, expression)
@@ -850,27 +766,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]),
("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]),
("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]),
("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!# ", " y$!# ", " z$!#", "?"]),
("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!#", "y$!#", "z$!#", "?"]),
("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]),
("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]),
("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]),
("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!# ", " y$!# ", " z$!#", "?"]),
("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!#", "y$!#", "z$!#", "?"]),
("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]),
])
def test_i_can_post_fix_sequences(self, expression, expected):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "four": Concept("four"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
res = parser.infix_to_postfix(context, expression)
@@ -886,23 +791,49 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
"plus equals": Concept("a plus equals b").def_var("a").def_var("b"),
}
sheerka, context, parser = self.init_parser(concepts_map, None)
sya_def = {
concepts_map["plus"]: (1, SyaAssociativity.Right),
concepts_map["plus plus"]: (1, SyaAssociativity.Right),
concepts_map["plus equals"]: (1, SyaAssociativity.Right),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
expression = "a plus plus equals b"
res = parser.infix_to_postfix(context, expression)
expected_array = tests.parsers.parsers_utils.compute_debug_array(res)
assert expected_array == [
["a", "a plus b", "a plus b", "equals", "b"],
["a", "a plus b", "a plus plus", "equals", "b"],
["a", "a plus b", "a plus equals b", "equals", "b"],
["a", "a plus plus", "plus", "equals", "b"],
["a", "a plus plus", "plus", "equals", "b"],
["a", "a plus plus", "plus", "equals", "b"],
["a", "a plus equals b", "a plus b", "equals", "b"],
["a", "a plus equals b", "a plus plus", "equals", "b"],
["a", "a plus equals b", "a plus equals b", "equals", "b"],
["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"],
["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"],
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"],
["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"],
["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"],
]
def test_non_reg(self):
concepts_map = {
"plus": Concept("a plus b").def_var("a").def_var("b"),
"complex infix": Concept("a complex infix b ").def_var("a").def_var("b"),
}
sya_def = {
# concepts_map["plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus plus"]: (1, SyaAssociativity.Right),
# concepts_map["plus equals"]: (1, SyaAssociativity.Right),
}
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
expression = "a plus complex infix b"
res = parser.infix_to_postfix(context, expression)
res = parser.parse(context, expression)
pass
def test_i_can_use_string_instead_of_identifier(self):
concepts_map = {
"ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"),
@@ -945,13 +876,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
Not quite sure why this test is here
:return:
"""
# concepts_map = {
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
sheerka, context, parser = self.init_parser()
expression = "suffixed twenties"
res = parser.infix_to_postfix(context, expression)
@@ -962,17 +887,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res[0].out == expected_array
def test_i_can_parse_when_concept_atom_only(self):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser()
text = "one plus two mult three"
@@ -992,10 +906,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert expected_concept.compiled["b"].compiled["b"] == cmap["three"]
def test_i_can_parse_when_python_code(self):
# concepts_map = {
# "foo": Concept("foo a").def_var("a")
# }
sheerka, context, parser = self.init_parser(cmap, None)
sheerka, context, parser = self.init_parser()
text = "suffixed 1 + 1"
res = parser.parse(context, text)
@@ -1014,16 +925,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(return_value_a, BuiltinConcepts.RETURN_VALUE)
assert return_value_a.status
assert sheerka.isinstance(return_value_a.body, BuiltinConcepts.PARSER_RESULT)
assert return_value_a.body.source == " 1 + 1"
assert return_value_a.body.source == "1 + 1"
assert isinstance(return_value_a.body.body, PythonNode)
def test_i_can_parse_when_bnf_concept(self):
# concepts_map = {
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
text = "suffixed twenty one"
@@ -1043,13 +948,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"]
def test_i_can_parse_sequences(self):
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "foo": Concept("foo a").def_var("a"),
# "one": Concept("one"),
# "two": Concept("two"),
# }
sheerka, context, parser = self.init_parser(cmap, None)
sheerka, context, parser = self.init_parser()
text = "one plus 1 + 1 suffixed two"
res = parser.parse(context, text)
@@ -1081,27 +980,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("f1(one prefixed) plus f2(suffixed two)", True, [
CNC("plus",
a=SCWC("f1(", ")", CNC("prefixed", a="one")),
b=SCWC(" f2(", (")", 1), CNC("suffixed", a="two")))
b=SCWC("f2(", (")", 1), CNC("suffixed", a="two")))
]),
("function(suffixed x$!#)", False, [
SCWC("function(", ")", CNC("suffixed", 2, 7, a=" x$!#"))]),
SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
])
def test_i_can_parse_when_one_result(self, text, expected_status, expected_result):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
@@ -1113,41 +997,54 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
# @pytest.mark.parametrize("text, list_of_expected", [
# ("1 plus twenty one", [
# (False, [CNC("plus", a=scnode(0, 0, "1"), b=UTN(" twenty ")), CN("one")]),
# (True, [CNC("plus", a=scnode(0, 0, "1"), b=CN("twenties", source="twenty one"))])
# ])
# ])
# def test_i_can_parse_when_multiple_results(self, text, list_of_expected):
# concepts_map = {
# "prefixed": Concept("a prefixed").def_var("a"),
# "suffixed": Concept("suffixed a").def_var("a"),
# "mult": Concept("a mult b").def_var("a").def_var("b"),
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
# sya_def = {
# concepts_map["plus"]: (5, SyaAssociativity.Right),
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
# }
# sheerka, context, parser = self.init_parser(concepts_map, sya_def)
#
# list_of_res = parser.parse(context, text)
# assert len(list_of_res) == len(list_of_expected)
#
# for res, expected in zip(list_of_res, list_of_expected):
# wrapper = res.body
# lexer_nodes = res.body.body
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
#
# expected_array = compute_expected_array(concepts_map, text, expected[1])
# assert res.status == expected[0]
# assert lexer_nodes == expected_array
@pytest.mark.parametrize("text", [
"foo bar (one",
"foo bar one",
"foo one two",
"foo x$!# one",
])
def test_i_cannot_parse_when_concept_almost_found(self, text):
"""
We test that the parsed concept seems like a known one, but it was not.
The parser has to detected that the predication was incorrect
:return:
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.body == text
@pytest.mark.parametrize("text, expected_result", [
("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]),
("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),
("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]),
("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]),
("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]),
("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]),
("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]),
("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]),
("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]),
])
def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result):
"""
We test that the parsed concept seems like a known one, but it was not.
The parser has to detected that the predication was incorrect
:return:
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
expected_array = compute_expected_array(cmap, text, expected_result)
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [
("x$!# prefixed", "prefixed", ["a"]),
@@ -1157,12 +1054,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("x$!# infix z$!#", "infix", ["a", "b"]),
])
def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "prefixed": Concept("a prefixed").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "one": Concept("one")
# }
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
@@ -1183,13 +1074,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]),
])
def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected):
# concepts_map = {
# "suffixed": Concept("suffixed a").def_var("a"),
# "prefixed": Concept("a prefixed").def_var("a"),
# "infix": Concept("a infix b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# }
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
@@ -1203,7 +1087,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text", [
"one",
"1 + 1",
"x$!# ",
"x$!#",
"twenty one"
"",
"function(not an sya concept)",
@@ -1214,13 +1098,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
Atoms concepts, source code or BNF concepts alone are discarded by the lexer
:return:
"""
# concepts_map = {
# "plus": Concept("a plus b").def_var("a").def_var("b"),
# "one": Concept("one"),
# "two": Concept("two"),
# "three": Concept("three"),
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
# }
sheerka, context, parser = self.init_parser()
res = parser.parse(context, text)
File diff suppressed because it is too large Load Diff