Fixed SyaNodeParser false positive recognition issue
This commit is contained in:
@@ -349,9 +349,6 @@ class EnumerationConcept(Concept):
|
|||||||
self.set_value(ConceptParts.BODY, iteration)
|
self.set_value(ConceptParts.BODY, iteration)
|
||||||
self.metadata.is_evaluated = True
|
self.metadata.is_evaluated = True
|
||||||
|
|
||||||
# def __iter__(self):
|
|
||||||
# return iter(self.body)
|
|
||||||
|
|
||||||
|
|
||||||
class ListConcept(Concept):
|
class ListConcept(Concept):
|
||||||
def __init__(self, items=None):
|
def __init__(self, items=None):
|
||||||
@@ -362,21 +359,6 @@ class ListConcept(Concept):
|
|||||||
def append(self, obj):
|
def append(self, obj):
|
||||||
self.body.append(obj)
|
self.body.append(obj)
|
||||||
|
|
||||||
# def __len__(self):
|
|
||||||
# return len(self.body)
|
|
||||||
#
|
|
||||||
# def __getitem__(self, key):
|
|
||||||
# return self.body[key]
|
|
||||||
#
|
|
||||||
# def __setitem__(self, key, value):
|
|
||||||
# self.body[key] = value
|
|
||||||
#
|
|
||||||
# def __iter__(self):
|
|
||||||
# return iter(self.body)
|
|
||||||
#
|
|
||||||
# def __contains__(self, item):
|
|
||||||
# return item in self.body
|
|
||||||
|
|
||||||
|
|
||||||
class FilteredConcept(Concept):
|
class FilteredConcept(Concept):
|
||||||
def __init__(self, filtered=None, iterable=None, predicate=None):
|
def __init__(self, filtered=None, iterable=None, predicate=None):
|
||||||
|
|||||||
@@ -326,6 +326,7 @@ def ensure_evaluated(context, concept):
|
|||||||
|
|
||||||
return evaluated
|
return evaluated
|
||||||
|
|
||||||
|
|
||||||
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
|
def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers):
|
||||||
"""
|
"""
|
||||||
Using parsers, try to recognize concepts from source
|
Using parsers, try to recognize concepts from source
|
||||||
|
|||||||
+3
-3
@@ -221,7 +221,7 @@ class Concept:
|
|||||||
Create the key for this concept.
|
Create the key for this concept.
|
||||||
Must be called only when the concept if fully initialized
|
Must be called only when the concept if fully initialized
|
||||||
|
|
||||||
The method is not called set_key to make sure that no other class set the key by mistake
|
The method is not called 'set_key' to make sure that no other class set the key by mistake
|
||||||
:param tokens:
|
:param tokens:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
@@ -248,8 +248,8 @@ class Concept:
|
|||||||
if token.value in variables:
|
if token.value in variables:
|
||||||
key += VARIABLE_PREFIX + str(variables.index(token.value))
|
key += VARIABLE_PREFIX + str(variables.index(token.value))
|
||||||
else:
|
else:
|
||||||
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
#value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||||
key += value
|
key += token.value
|
||||||
first = False
|
first = False
|
||||||
|
|
||||||
self.metadata.key = key
|
self.metadata.key = key
|
||||||
|
|||||||
@@ -56,12 +56,6 @@ class SheerkaCreateNewConcept:
|
|||||||
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||||
|
|
||||||
# update concept definition by key
|
|
||||||
# init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True)
|
|
||||||
# if not init_sya_ret_value.status:
|
|
||||||
# return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
|
|
||||||
# concepts_by_first_keyword = init_sya_ret_value.body
|
|
||||||
|
|
||||||
concept.freeze_definition_hash()
|
concept.freeze_definition_hash()
|
||||||
|
|
||||||
cache_manager.add_concept(concept)
|
cache_manager.add_concept(concept)
|
||||||
@@ -74,21 +68,3 @@ class SheerkaCreateNewConcept:
|
|||||||
# process the return if needed
|
# process the return if needed
|
||||||
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
# def load_concepts_nodes_definitions(self, context):
|
|
||||||
# """
|
|
||||||
# Gets from sdp what is need to parse nodes
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
# sdp = self.sheerka.sdp
|
|
||||||
#
|
|
||||||
# concepts_by_first_keyword = sdp.get(
|
|
||||||
# self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
|
||||||
# load_origin=False) or {}
|
|
||||||
#
|
|
||||||
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
|
||||||
# if not init_ret_value.status:
|
|
||||||
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
|
||||||
# resolved_concepts_by_first_keyword = init_ret_value.body
|
|
||||||
#
|
|
||||||
# return concepts_by_first_keyword, resolved_concepts_by_first_keyword
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
|
||||||
import core.utils
|
import core.utils
|
||||||
|
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||||
|
|
||||||
NO_MATCH = "** No Match **"
|
NO_MATCH = "** No Match **"
|
||||||
|
|
||||||
|
|
||||||
class SheerkaExecute:
|
class SheerkaExecute:
|
||||||
"""
|
"""
|
||||||
Manage the execution of a process flow
|
Manage the execution of a process flow
|
||||||
@@ -58,7 +59,8 @@ class SheerkaExecute:
|
|||||||
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
|
# else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens"
|
||||||
# execution_context.log(f"Parsing {debug_text}")
|
# execution_context.log(f"Parsing {debug_text}")
|
||||||
|
|
||||||
with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context:
|
with execution_context.push(desc=f"Parsing using {parser.name}",
|
||||||
|
logger=parser.verbose_log) as sub_context:
|
||||||
sub_context.add_inputs(to_parse=to_parse)
|
sub_context.add_inputs(to_parse=to_parse)
|
||||||
res = parser.parse(sub_context, to_parse)
|
res = parser.parse(sub_context, to_parse)
|
||||||
if res is not None:
|
if res is not None:
|
||||||
@@ -86,7 +88,6 @@ class SheerkaExecute:
|
|||||||
stop_processing = True
|
stop_processing = True
|
||||||
sub_context.add_values(return_values=res)
|
sub_context.add_values(return_values=res)
|
||||||
|
|
||||||
|
|
||||||
if stop_processing:
|
if stop_processing:
|
||||||
break # Do not try the other priorities if a match is found
|
break # Do not try the other priorities if a match is found
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class SheerkaModifyConcept:
|
|||||||
|
|
||||||
# TODO : update concept by first keyword
|
# TODO : update concept by first keyword
|
||||||
# TODO : update resolved by first keyword
|
# TODO : update resolved by first keyword
|
||||||
# TODO : update concets grammars
|
# TODO : update concepts grammars
|
||||||
|
|
||||||
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||||
return ret
|
return ret
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from sdp.sheerkaSerializer import Serializer
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Variable:
|
class Variable:
|
||||||
|
|||||||
@@ -60,10 +60,6 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
|
self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token)
|
||||||
|
|
||||||
# # Cache for concepts grammars
|
|
||||||
# # To be shared between BNFNode parsers instances
|
|
||||||
# self.concepts_grammars = {}
|
|
||||||
|
|
||||||
# a concept can be instantiated
|
# a concept can be instantiated
|
||||||
# ex: File is a concept, but File('foo.txt') is an instance
|
# ex: File is a concept, but File('foo.txt') is an instance
|
||||||
# TODO: manage contexts
|
# TODO: manage contexts
|
||||||
@@ -303,27 +299,6 @@ class Sheerka(Concept):
|
|||||||
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||||
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
|
self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
|
||||||
|
|
||||||
# sya = self.bnf.resolve_sya_associativity_and_precedence()
|
|
||||||
# self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# self.concepts_by_first_keyword, \
|
|
||||||
# self.resolved_concepts_by_first_keyword = \
|
|
||||||
# self.create_new_concept_handler.load_concepts_nodes_definitions(context)
|
|
||||||
|
|
||||||
# self.concepts_by_first_keyword = self.sdp.get_safe(
|
|
||||||
# self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
|
||||||
# load_origin=False) or {}
|
|
||||||
#
|
|
||||||
# self.sya_definitions = self.sdp.get_safe(
|
|
||||||
# self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
|
||||||
# load_origin=False) or {}
|
|
||||||
#
|
|
||||||
# init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword)
|
|
||||||
# if not init_ret_value.status:
|
|
||||||
# return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
|
||||||
# self.resolved_concepts_by_first_keyword = init_ret_value.body
|
|
||||||
|
|
||||||
def reset(self, cache_only=False):
|
def reset(self, cache_only=False):
|
||||||
self.cache_manager.clear()
|
self.cache_manager.clear()
|
||||||
self.cache_manager.cache_only = cache_only
|
self.cache_manager.cache_only = cache_only
|
||||||
@@ -346,7 +321,6 @@ class Sheerka(Concept):
|
|||||||
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
|
with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context:
|
||||||
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
|
user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name))
|
||||||
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
|
reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED))
|
||||||
# execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED)
|
|
||||||
|
|
||||||
steps = [
|
steps = [
|
||||||
BuiltinConcepts.BEFORE_PARSING,
|
BuiltinConcepts.BEFORE_PARSING,
|
||||||
@@ -525,28 +499,6 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
return concept
|
return concept
|
||||||
|
|
||||||
#
|
|
||||||
# def get(self, concept_key, concept_id=None):
|
|
||||||
# """
|
|
||||||
# Tries to find a concept
|
|
||||||
# What is return must be used a template for another concept.
|
|
||||||
# You must not modify the returned concept
|
|
||||||
# :param concept_key: key of the concept
|
|
||||||
# :param concept_id: when multiple concepts with the same key, use the id
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# by_key = self.get_by_key(concept_key)
|
|
||||||
# if self.is_known(by_key):
|
|
||||||
# return by_key
|
|
||||||
#
|
|
||||||
# # else return by name
|
|
||||||
# by_name = self.get_by_name(concept_key)
|
|
||||||
# if self.is_known(by_name):
|
|
||||||
# return by_name
|
|
||||||
#
|
|
||||||
# return by_key # return not found for key
|
|
||||||
|
|
||||||
def get_by_key(self, concept_key, concept_id=None):
|
def get_by_key(self, concept_key, concept_id=None):
|
||||||
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
|
concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key
|
||||||
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
|
return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id)
|
||||||
|
|||||||
+32
-2
@@ -1,4 +1,4 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
@@ -48,6 +48,7 @@ class TokenKind(Enum):
|
|||||||
DEGREE = "degree" # °
|
DEGREE = "degree" # °
|
||||||
WORD = "word"
|
WORD = "word"
|
||||||
EQUALSEQUALS = "=="
|
EQUALSEQUALS = "=="
|
||||||
|
VAR_DEF = "__var__"
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
@@ -58,6 +59,8 @@ class Token:
|
|||||||
line: int
|
line: int
|
||||||
column: int
|
column: int
|
||||||
|
|
||||||
|
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self.type == TokenKind.IDENTIFIER:
|
if self.type == TokenKind.IDENTIFIER:
|
||||||
value = str(self.value)
|
value = str(self.value)
|
||||||
@@ -72,6 +75,23 @@ class Token:
|
|||||||
|
|
||||||
return f"Token({value})"
|
return f"Token({value})"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def str_value(self):
|
||||||
|
if self._str_value:
|
||||||
|
return self._str_value
|
||||||
|
|
||||||
|
if self.type == TokenKind.STRING:
|
||||||
|
self._str_value = self.value[1:-1]
|
||||||
|
elif self.type == TokenKind.KEYWORD:
|
||||||
|
self._str_value = self.value.value
|
||||||
|
else:
|
||||||
|
self._str_value = str(self.value)
|
||||||
|
return self._str_value
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_whitespace(token):
|
||||||
|
return token and token.type == TokenKind.WHITESPACE
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class LexerError(Exception):
|
class LexerError(Exception):
|
||||||
@@ -101,12 +121,13 @@ class Tokenizer:
|
|||||||
|
|
||||||
KEYWORDS = set(x.value for x in Keywords)
|
KEYWORDS = set(x.value for x in Keywords)
|
||||||
|
|
||||||
def __init__(self, text, parse_word=False):
|
def __init__(self, text, yield_eof=True, parse_word=False):
|
||||||
self.text = text
|
self.text = text
|
||||||
self.text_len = len(text)
|
self.text_len = len(text)
|
||||||
self.column = 1
|
self.column = 1
|
||||||
self.line = 1
|
self.line = 1
|
||||||
self.i = 0
|
self.i = 0
|
||||||
|
self.yield_eof = yield_eof
|
||||||
self.parse_word = parse_word
|
self.parse_word = parse_word
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
@@ -134,6 +155,7 @@ class Tokenizer:
|
|||||||
self.i += 1
|
self.i += 1
|
||||||
self.column += 1
|
self.column += 1
|
||||||
elif c == "_":
|
elif c == "_":
|
||||||
|
from core.concept import VARIABLE_PREFIX
|
||||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||||
identifier = self.eat_identifier(self.i)
|
identifier = self.eat_identifier(self.i)
|
||||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||||
@@ -141,6 +163,13 @@ class Tokenizer:
|
|||||||
yield Token(token_type, value, self.i, self.line, self.column)
|
yield Token(token_type, value, self.i, self.line, self.column)
|
||||||
self.i += len(identifier)
|
self.i += len(identifier)
|
||||||
self.column += len(identifier)
|
self.column += len(identifier)
|
||||||
|
elif self.i + 7 < self.text_len and \
|
||||||
|
self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \
|
||||||
|
self.text[self.i + 7].isdigit():
|
||||||
|
number = self.eat_number(self.i + 7)
|
||||||
|
yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column)
|
||||||
|
self.i += 7 + len(number)
|
||||||
|
self.column += 7 + len(number)
|
||||||
else:
|
else:
|
||||||
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
|
||||||
self.i += 1
|
self.i += 1
|
||||||
@@ -308,6 +337,7 @@ class Tokenizer:
|
|||||||
else:
|
else:
|
||||||
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
|
||||||
|
|
||||||
|
if self.yield_eof:
|
||||||
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
|
||||||
|
|
||||||
def eat_concept(self, start, line, column):
|
def eat_concept(self, start, line, column):
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ class AtomConceptParserHelper:
|
|||||||
|
|
||||||
self.debug.append(token)
|
self.debug.append(token)
|
||||||
|
|
||||||
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
|
if self.expected_tokens[0] != token.str_value:
|
||||||
self.errors.append(UnexpectedTokenErrorNode(
|
self.errors.append(UnexpectedTokenErrorNode(
|
||||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||||
token,
|
token,
|
||||||
@@ -119,7 +119,7 @@ class AtomConceptParserHelper:
|
|||||||
forked.eat_concept(concept, pos)
|
forked.eat_concept(concept, pos)
|
||||||
|
|
||||||
concept_node = ConceptNode(concept, pos, pos)
|
concept_node = ConceptNode(concept, pos, pos)
|
||||||
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
|
expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
|
||||||
|
|
||||||
if not expected:
|
if not expected:
|
||||||
# the concept is already matched
|
# the concept is already matched
|
||||||
|
|||||||
@@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode):
|
|||||||
self.is_frozen = False
|
self.is_frozen = False
|
||||||
self.parenthesis_count = 0
|
self.parenthesis_count = 0
|
||||||
|
|
||||||
def has_open_paren(self):
|
|
||||||
return self.parenthesis_count > 0
|
|
||||||
|
|
||||||
def add_token(self, token, pos):
|
def add_token(self, token, pos):
|
||||||
if self.is_frozen:
|
if self.is_frozen:
|
||||||
raise Exception("The node is frozen")
|
raise Exception("The node is frozen")
|
||||||
@@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode):
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def pop(self, token_kind):
|
||||||
|
if self.is_frozen:
|
||||||
|
raise Exception("The node is frozen")
|
||||||
|
|
||||||
|
if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
|
||||||
|
self.tokens.pop()
|
||||||
|
if len(self.tokens) == 0:
|
||||||
|
self.reset()
|
||||||
|
else:
|
||||||
|
self.end -= 1
|
||||||
|
|
||||||
|
|
||||||
|
def has_open_paren(self):
|
||||||
|
return self.parenthesis_count > 0
|
||||||
|
|
||||||
def not_whitespace(self):
|
def not_whitespace(self):
|
||||||
return not self.is_whitespace()
|
return not self.is_whitespace()
|
||||||
|
|
||||||
@@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode):
|
|||||||
def is_empty(self):
|
def is_empty(self):
|
||||||
return len(self.tokens) == 0
|
return len(self.tokens) == 0
|
||||||
|
|
||||||
|
def last_token_type(self):
|
||||||
|
if len(self.tokens) == 0:
|
||||||
|
return None
|
||||||
|
return self.tokens[-1].type
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if isinstance(other, utnode):
|
if isinstance(other, utnode):
|
||||||
return self.start == other.start and \
|
return self.start == other.start and \
|
||||||
@@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser):
|
|||||||
|
|
||||||
return custom_concepts if custom else None
|
return custom_concepts if custom else None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_token_value(token):
|
|
||||||
if token.type == TokenKind.STRING:
|
|
||||||
return token.value[1:-1]
|
|
||||||
elif token.type == TokenKind.KEYWORD:
|
|
||||||
return token.value.value
|
|
||||||
else:
|
|
||||||
return token.value
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
||||||
"""
|
"""
|
||||||
|
|||||||
+189
-74
@@ -1,15 +1,16 @@
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from operator import attrgetter
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from core import builtin_helpers
|
from core import builtin_helpers
|
||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
|
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||||
from core.sheerka.ExecutionContext import ExecutionContext
|
from core.sheerka.ExecutionContext import ExecutionContext
|
||||||
from core.tokenizer import Token, TokenKind
|
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||||
SourceCodeWithConceptNode, BaseNodeParser
|
SourceCodeWithConceptNode, BaseNodeParser
|
||||||
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
|
from parsers.BaseParser import ErrorNode
|
||||||
|
|
||||||
PARSERS = ["BnfNode", "AtomNode", "Python"]
|
PARSERS = ["BnfNode", "AtomNode", "Python"]
|
||||||
|
|
||||||
@@ -88,10 +89,13 @@ class SyaConceptParserHelper:
|
|||||||
concept: Concept
|
concept: Concept
|
||||||
start: int # position of the token in the tokenizer (Caution, it is not token.index)
|
start: int # position of the token in the tokenizer (Caution, it is not token.index)
|
||||||
end: int = field(default=-1, repr=False, compare=False, hash=None)
|
end: int = field(default=-1, repr=False, compare=False, hash=None)
|
||||||
expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None)
|
expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
|
||||||
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
|
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
|
||||||
|
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
|
||||||
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
|
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
|
||||||
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
|
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
|
||||||
|
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
|
||||||
|
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
|
||||||
error: str = None
|
error: str = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
@@ -99,17 +103,20 @@ class SyaConceptParserHelper:
|
|||||||
if self.end == -1:
|
if self.end == -1:
|
||||||
self.end = self.start
|
self.end = self.start
|
||||||
|
|
||||||
first_keyword_found = False
|
first_keyword_found = None
|
||||||
for name in concept.key.split():
|
for token in Tokenizer(concept.key, yield_eof=False):
|
||||||
if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found:
|
if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
|
||||||
first_keyword_found = True
|
first_keyword_found = token
|
||||||
|
|
||||||
if first_keyword_found:
|
if first_keyword_found:
|
||||||
self.expected.append(name)
|
self.expected.append(token)
|
||||||
else:
|
else:
|
||||||
|
self.last_token_before_first_token = token
|
||||||
|
if token.type != TokenKind.WHITESPACE:
|
||||||
self.expected_parameters_before_first_token += 1
|
self.expected_parameters_before_first_token += 1
|
||||||
|
|
||||||
self.eat_token() # remove the fist token
|
self.eat_token(first_keyword_found) # remove the first token
|
||||||
|
self.tokens.append(first_keyword_found)
|
||||||
|
|
||||||
def is_matched(self):
|
def is_matched(self):
|
||||||
return len(self.expected) == 0
|
return len(self.expected) == 0
|
||||||
@@ -117,23 +124,38 @@ class SyaConceptParserHelper:
|
|||||||
def is_atom(self):
|
def is_atom(self):
|
||||||
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
|
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
|
||||||
|
|
||||||
def is_expected(self, token):
|
def is_next(self, token):
|
||||||
if self.is_matched():
|
if self.is_matched() or len(self.expected) == 0:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
token_value = BaseNodeParser.get_token_value(token)
|
# True if the next token is the one that is expected
|
||||||
|
# Or if the next token is a whitespace and the expected one is the one after
|
||||||
|
# (whitespace are sometimes not mandatory)
|
||||||
|
return token.str_value == self.expected[0].str_value or \
|
||||||
|
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
|
||||||
|
|
||||||
|
def is_expected(self, token):
|
||||||
|
if self.is_matched() or token.type == TokenKind.WHITESPACE:
|
||||||
|
return False
|
||||||
|
|
||||||
for expected in self.expected:
|
for expected in self.expected:
|
||||||
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
|
if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def expected_parameters(self):
|
def expected_parameters(self):
|
||||||
return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected))
|
return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
|
||||||
|
|
||||||
def eat_token(self):
|
def eat_token(self, until_token):
|
||||||
# No check, as it is used only after is_expected
|
"""
|
||||||
|
eat until token 'until'
|
||||||
|
:param until_token:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
# No check, as it is used only after is_expected() or is_next()
|
||||||
|
while self.expected[0].str_value != until_token.str_value:
|
||||||
|
del self.expected[0]
|
||||||
del self.expected[0]
|
del self.expected[0]
|
||||||
|
|
||||||
# return True is a whole sequence of keyword is eaten
|
# return True is a whole sequence of keyword is eaten
|
||||||
@@ -143,7 +165,10 @@ class SyaConceptParserHelper:
|
|||||||
if len(self.expected) == 0:
|
if len(self.expected) == 0:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return self.expected[0].startswith(VARIABLE_PREFIX)
|
# also return True at the end of a name sequence
|
||||||
|
# ... <var0> bar baz qux <var1>
|
||||||
|
# return True after 'qux', to indicate all the parameters from <var0> must be processed
|
||||||
|
return self.expected[0].type == TokenKind.VAR_DEF
|
||||||
|
|
||||||
def eat_parameter(self, parameter):
|
def eat_parameter(self, parameter):
|
||||||
if self.is_matched() and parameter == self:
|
if self.is_matched() and parameter == self:
|
||||||
@@ -153,7 +178,7 @@ class SyaConceptParserHelper:
|
|||||||
self.error = "No more parameter expected"
|
self.error = "No more parameter expected"
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self.expected[0].startswith(VARIABLE_PREFIX):
|
if self.expected[0].type != TokenKind.VAR_DEF:
|
||||||
self.error = "Parameter was not expected"
|
self.error = "Parameter was not expected"
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -202,6 +227,7 @@ class InFixToPostFix:
|
|||||||
self.errors = [] # Not quite sure that I can handle more than one error
|
self.errors = [] # Not quite sure that I can handle more than one error
|
||||||
|
|
||||||
self.debug = []
|
self.debug = []
|
||||||
|
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
|
||||||
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
|
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@@ -245,7 +271,6 @@ class InFixToPostFix:
|
|||||||
Note that when we are parsing non recognized tokens,
|
Note that when we are parsing non recognized tokens,
|
||||||
we consider that the parenthesis are part of the non recognized
|
we consider that the parenthesis are part of the non recognized
|
||||||
:param token:
|
:param token:
|
||||||
:param stack:
|
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
return isinstance(token, Token) and token.type == TokenKind.RPAR
|
return isinstance(token, Token) and token.type == TokenKind.RPAR
|
||||||
@@ -268,10 +293,10 @@ class InFixToPostFix:
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
|
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
|
||||||
if item.expected[0].startswith(VARIABLE_PREFIX):
|
if item.expected[0].type == TokenKind.VAR_DEF:
|
||||||
item.error = "Not enough suffix parameters"
|
item.error = "Not enough suffix parameters"
|
||||||
else:
|
else:
|
||||||
item.error = f"token '{item.expected[0]}' not found"
|
item.error = f"token '{item.expected[0].str_value}' not found"
|
||||||
|
|
||||||
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
|
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
|
||||||
self.out.insert(item.potential_pos, item)
|
self.out.insert(item.potential_pos, item)
|
||||||
@@ -328,6 +353,16 @@ class InFixToPostFix:
|
|||||||
).pseudo_fix_source()
|
).pseudo_fix_source()
|
||||||
return source_code
|
return source_code
|
||||||
|
|
||||||
|
def _transform_to_unrecognized(self, parser_helper):
|
||||||
|
# an Unrecognized when sent to out too prematurely
|
||||||
|
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
|
||||||
|
self.unrecognized_tokens = self.out.pop()
|
||||||
|
|
||||||
|
if parser_helper.remember_whitespace:
|
||||||
|
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
|
||||||
|
for i, token in enumerate(parser_helper.tokens):
|
||||||
|
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
|
||||||
|
|
||||||
def get_errors(self):
|
def get_errors(self):
|
||||||
res = []
|
res = []
|
||||||
res.extend(self.errors)
|
res.extend(self.errors)
|
||||||
@@ -343,28 +378,28 @@ class InFixToPostFix:
|
|||||||
|
|
||||||
self.is_locked = False
|
self.is_locked = False
|
||||||
|
|
||||||
def manage_parameters_when_new_concept(self, temp_concept_node):
|
def manage_parameters_when_new_concept(self, parser_helper):
|
||||||
"""
|
"""
|
||||||
When a new concept is create, we need to check what to do with the parameters
|
When a new concept is create, we need to check what to do with the parameters
|
||||||
that were queued
|
that were queued
|
||||||
:param temp_concept_node: new concept
|
:param parser_helper: new concept
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token:
|
if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
|
||||||
# The new concept expect some prefix parameters, but there's not enough
|
# The new concept expect some prefix parameters, but there's not enough
|
||||||
temp_concept_node.error = "Not enough prefix parameters"
|
parser_helper.error = "Not enough prefix parameters"
|
||||||
return
|
return
|
||||||
|
|
||||||
if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token:
|
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
|
||||||
# There are more parameters than needed by the new concept
|
# There are more parameters than needed by the new concept
|
||||||
# The others are either
|
# The others are either
|
||||||
# - parameters for the previous concept (if any)
|
# - parameters for the previous concept (if any)
|
||||||
# - concepts on their own
|
# - concepts on their own
|
||||||
# - syntax error
|
# - syntax error
|
||||||
# In all the cases, the only thing that matter is to pop what is expected by the new concept
|
# In all the cases, the only thing that matter is to pop what is expected by the new concept
|
||||||
for i in range(temp_concept_node.expected_parameters_before_first_token):
|
for i in range(parser_helper.expected_parameters_before_first_token):
|
||||||
self.parameters_list.pop()
|
self.parameters_list.pop()
|
||||||
temp_concept_node.parameters_list_at_init.extend(self.parameters_list)
|
parser_helper.parameters_list_at_init.extend(self.parameters_list)
|
||||||
return
|
return
|
||||||
|
|
||||||
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
|
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
|
||||||
@@ -385,14 +420,18 @@ class InFixToPostFix:
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# manage parenthesis that didn't find any match
|
||||||
|
if self._is_lpar(self.stack[-1]):
|
||||||
|
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||||
|
|
||||||
# The parameter must be part the current concept being parsed
|
# The parameter must be part the current concept being parsed
|
||||||
assert len(self._concepts()) != 0 # sanity check
|
assert len(self._concepts()) != 0 # sanity check
|
||||||
|
|
||||||
current_concept = self._concepts()[-1]
|
current_concept = self._concepts()[-1]
|
||||||
while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX):
|
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
|
||||||
# eat everything that was expected
|
# eat everything that was expected
|
||||||
if len(self.parameters_list) == 0:
|
if len(self.parameters_list) == 0:
|
||||||
# current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'"
|
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
|
||||||
return
|
return
|
||||||
del self.parameters_list[0]
|
del self.parameters_list[0]
|
||||||
del current_concept.expected[0]
|
del current_concept.expected[0]
|
||||||
@@ -506,6 +545,11 @@ class InFixToPostFix:
|
|||||||
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
|
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
|
||||||
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
|
self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
|
||||||
|
|
||||||
|
if not current.precedence:
|
||||||
|
# precedence is not set (None or zero)
|
||||||
|
# Do not apply any rule
|
||||||
|
return False
|
||||||
|
|
||||||
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
|
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -528,9 +572,55 @@ class InFixToPostFix:
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _pop_stack(c):
|
||||||
|
while self.stack[-1] != c and not self._is_lpar(c):
|
||||||
|
self.pop_stack_to_out()
|
||||||
|
|
||||||
|
if self._is_lpar(self.stack[-1]):
|
||||||
|
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Manage concepts ending with long names
|
||||||
|
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
||||||
|
self.pop_stack_to_out()
|
||||||
|
|
||||||
for current_concept in reversed(self._concepts()):
|
for current_concept in reversed(self._concepts()):
|
||||||
|
# As I may loose memory again ;-)
|
||||||
|
# it's a reversed loop to manage cases like
|
||||||
|
# if a plus b then ...
|
||||||
|
# The current concept is 'plus', but the token is 'then'
|
||||||
|
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
|
||||||
|
|
||||||
|
if current_concept.is_next(token):
|
||||||
|
current_concept.end = pos
|
||||||
|
current_concept.tokens.append(token)
|
||||||
|
if current_concept.eat_token(token):
|
||||||
|
_pop_stack(current_concept)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
|
||||||
|
if current_concept.expected[0].type == TokenKind.WHITESPACE:
|
||||||
|
# drop it. It's the case where an optional whitespace is missing
|
||||||
|
del (current_concept.expected[0])
|
||||||
|
else:
|
||||||
|
# error
|
||||||
|
# We are not parsing the concept we tought we were parsing.
|
||||||
|
# Transform the eaten tokens into unrecognized
|
||||||
|
# and discard the current SyaConceptParserHelper
|
||||||
|
# TODO: manage the pending LPAR, RPAR ?
|
||||||
|
self._transform_to_unrecognized(current_concept)
|
||||||
|
self.false_positives.append(current_concept)
|
||||||
|
self.stack.pop()
|
||||||
|
return False
|
||||||
|
|
||||||
if current_concept.is_expected(token):
|
if current_concept.is_expected(token):
|
||||||
|
|
||||||
|
# Fix the whitespace between var and expected if needed
|
||||||
|
# current_concept.expected[0] is '<var>'
|
||||||
|
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
|
||||||
|
if current_concept.expected[1].type == TokenKind.WHITESPACE:
|
||||||
|
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||||
|
|
||||||
current_concept.end = pos
|
current_concept.end = pos
|
||||||
self.manage_unrecognized()
|
self.manage_unrecognized()
|
||||||
# manage that some clones may have been forked
|
# manage that some clones may have been forked
|
||||||
@@ -550,36 +640,33 @@ class InFixToPostFix:
|
|||||||
self.parameters_list[:]))
|
self.parameters_list[:]))
|
||||||
return True # no need to continue
|
return True # no need to continue
|
||||||
|
|
||||||
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
|
||||||
|
current = self.stack[-1]
|
||||||
|
if current.error:
|
||||||
|
self._transform_to_unrecognized(current)
|
||||||
|
self.false_positives.append(current)
|
||||||
|
self.stack.pop()
|
||||||
|
|
||||||
|
if current_concept.expected[1].type == TokenKind.WHITESPACE:
|
||||||
|
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||||
|
|
||||||
|
self.manage_unrecognized()
|
||||||
|
# manage that some clones may have been forked
|
||||||
|
for forked in self.forked:
|
||||||
|
forked.handle_expected_token(token, pos)
|
||||||
|
else:
|
||||||
self.pop_stack_to_out()
|
self.pop_stack_to_out()
|
||||||
self.manage_parameters()
|
self.manage_parameters()
|
||||||
|
|
||||||
if current_concept.eat_token():
|
# maybe eat whitespace that was between <var> and expected token
|
||||||
while self.stack[-1] != current_concept and not self._is_lpar(current_concept):
|
if current_concept.expected[0].type == TokenKind.WHITESPACE:
|
||||||
self.pop_stack_to_out()
|
del current_concept.expected[0]
|
||||||
|
|
||||||
if self._is_lpar(self.stack[-1]):
|
if current_concept.eat_token(token):
|
||||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
_pop_stack(current_concept)
|
||||||
return False
|
|
||||||
|
|
||||||
# Manage concepts ending with long names
|
|
||||||
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
|
|
||||||
self.pop_stack_to_out()
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# else:
|
|
||||||
# if token.type != TokenKind.WHITESPACE:
|
|
||||||
# # hack, because whitespaces are not correctly parsed in self.expected
|
|
||||||
# # KSI 2020/04/25
|
|
||||||
# # I no longer understand why we are in a loop (the reverse one)
|
|
||||||
# # if we are parsing a concept and the expected token does not match
|
|
||||||
# # The whole class should be in error
|
|
||||||
# self._add_error(UnexpectedTokenErrorNode(
|
|
||||||
# f"Failed to parse '{current_concept.concept.concept}'",
|
|
||||||
# token, current_concept.expected))
|
|
||||||
# return False
|
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def eat_token(self, token, pos):
|
def eat_token(self, token, pos):
|
||||||
@@ -692,10 +779,11 @@ class InFixToPostFix:
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def eat_concept(self, sya_concept_def, pos):
|
def eat_concept(self, sya_concept_def, token, pos):
|
||||||
"""
|
"""
|
||||||
a concept is found
|
a concept is found
|
||||||
:param sya_concept_def:
|
:param sya_concept_def:
|
||||||
|
:param token:
|
||||||
:param pos:
|
:param pos:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
@@ -704,37 +792,43 @@ class InFixToPostFix:
|
|||||||
return
|
return
|
||||||
self.debug.append(sya_concept_def)
|
self.debug.append(sya_concept_def)
|
||||||
|
|
||||||
temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos)
|
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
|
||||||
|
|
||||||
|
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
|
||||||
|
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
|
||||||
|
|
||||||
|
if Token.is_whitespace(parser_helper.last_token_before_first_token):
|
||||||
|
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||||
|
|
||||||
# First, try to recognize the tokens that are waiting
|
# First, try to recognize the tokens that are waiting
|
||||||
self.manage_unrecognized()
|
self.manage_unrecognized()
|
||||||
for forked in self.forked:
|
for forked in self.forked:
|
||||||
# manage the fact that some clone may have been forked
|
# manage the fact that some clone may have been forked
|
||||||
forked.eat_concept(sya_concept_def, pos)
|
forked.eat_concept(sya_concept_def, token, pos)
|
||||||
|
|
||||||
# then, check if this new concept is linked to the previous ones
|
# then, check if this new concept is linked to the previous ones
|
||||||
# ie, is the previous concept fully matched ?
|
# ie, is the previous concept fully matched ?
|
||||||
if temp_concept_node.expected_parameters_before_first_token == 0:
|
if parser_helper.expected_parameters_before_first_token == 0:
|
||||||
# => does not expect pending parameter (it's suffixed concept)
|
# => does not expect pending parameter (it's suffixed concept)
|
||||||
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
|
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
|
||||||
# => previous seems to have everything it needs in the parameter list
|
# => previous seems to have everything it needs in the parameter list
|
||||||
self.pop_stack_to_out()
|
self.pop_stack_to_out()
|
||||||
|
|
||||||
if temp_concept_node.is_atom():
|
if parser_helper.is_atom():
|
||||||
self._put_to_out(temp_concept_node.fix_concept())
|
self._put_to_out(parser_helper.fix_concept())
|
||||||
else:
|
else:
|
||||||
# call shunting yard algorithm
|
# call shunting yard algorithm
|
||||||
while self.i_can_pop(temp_concept_node):
|
while self.i_can_pop(parser_helper):
|
||||||
self.pop_stack_to_out()
|
self.pop_stack_to_out()
|
||||||
|
|
||||||
if temp_concept_node.is_matched():
|
if parser_helper.is_matched():
|
||||||
# case of a prefix concept which has found happiness with self.parameters_list
|
# case of a prefix concept which has found happiness with self.parameters_list
|
||||||
# directly put it in out
|
# directly put it in out
|
||||||
self.manage_parameters_when_new_concept(temp_concept_node)
|
self.manage_parameters_when_new_concept(parser_helper)
|
||||||
self._put_to_out(temp_concept_node.fix_concept())
|
self._put_to_out(parser_helper.fix_concept())
|
||||||
else:
|
else:
|
||||||
self.stack.append(temp_concept_node)
|
self.stack.append(parser_helper)
|
||||||
self.manage_parameters_when_new_concept(temp_concept_node)
|
self.manage_parameters_when_new_concept(parser_helper)
|
||||||
|
|
||||||
def eat_unrecognized(self, token, pos):
|
def eat_unrecognized(self, token, pos):
|
||||||
"""
|
"""
|
||||||
@@ -762,17 +856,33 @@ class InFixToPostFix:
|
|||||||
if len(self.stack) == 0 and len(self.out) == 0:
|
if len(self.stack) == 0 and len(self.out) == 0:
|
||||||
return # no need to pop the buffer, as no concept is found
|
return # no need to pop the buffer, as no concept is found
|
||||||
|
|
||||||
|
while len(self.stack) > 0:
|
||||||
|
parser_helper = self.stack[-1]
|
||||||
|
|
||||||
|
# validate parenthesis
|
||||||
|
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
|
||||||
|
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
|
||||||
|
return None
|
||||||
|
|
||||||
self.manage_unrecognized()
|
self.manage_unrecognized()
|
||||||
for forked in self.forked:
|
for forked in self.forked:
|
||||||
# manage that some clones may have been forked
|
# manage that some clones may have been forked
|
||||||
forked.finalize()
|
forked.finalize()
|
||||||
|
|
||||||
while len(self.stack) > 0:
|
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
|
||||||
if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]):
|
if failed_to_match > 0:
|
||||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
# didn't manage to read all tokens.
|
||||||
return None
|
# Transform them into unrecognized
|
||||||
|
self._transform_to_unrecognized(parser_helper)
|
||||||
|
self.false_positives.append(parser_helper)
|
||||||
|
self.stack.pop() # discard the parser helper
|
||||||
|
else:
|
||||||
|
self.pop_stack_to_out() # process it
|
||||||
|
|
||||||
self.pop_stack_to_out()
|
self.manage_unrecognized()
|
||||||
|
for forked in self.forked:
|
||||||
|
# manage that some clones may have been forked
|
||||||
|
forked.finalize()
|
||||||
|
|
||||||
def clone(self):
|
def clone(self):
|
||||||
clone = InFixToPostFix(self.context)
|
clone = InFixToPostFix(self.context)
|
||||||
@@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
|
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
|
||||||
# little optim, no need to get the concept when parenthesis
|
# little optim, no need to lock, unlock or get the concept when parenthesis
|
||||||
for infix_to_postfix in res:
|
for infix_to_postfix in res:
|
||||||
infix_to_postfix.eat_token(token, self.pos)
|
infix_to_postfix.eat_token(token, self.pos)
|
||||||
continue
|
continue
|
||||||
@@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser):
|
|||||||
|
|
||||||
if len(concepts) == 1:
|
if len(concepts) == 1:
|
||||||
for infix_to_postfix in res:
|
for infix_to_postfix in res:
|
||||||
infix_to_postfix.eat_concept(concepts[0], self.pos)
|
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# make the cartesian product
|
# make the cartesian product
|
||||||
@@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser):
|
|||||||
for concept in concepts:
|
for concept in concepts:
|
||||||
clone = infix_to_postfix.clone()
|
clone = infix_to_postfix.clone()
|
||||||
temp_res.append(clone)
|
temp_res.append(clone)
|
||||||
clone.eat_concept(concept, self.pos)
|
clone.eat_concept(concept, token, self.pos)
|
||||||
res = temp_res
|
res = temp_res
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser):
|
|||||||
to_insert = item
|
to_insert = item
|
||||||
sequence.insert(0, to_insert)
|
sequence.insert(0, to_insert)
|
||||||
|
|
||||||
|
if has_unrecognized:
|
||||||
|
# Manage some sick cases where missing parenthesis mess the order or the sequence
|
||||||
|
# example "foo bar(one plus two"
|
||||||
|
sequence.sort(key=attrgetter("start"))
|
||||||
|
|
||||||
ret.append(
|
ret.append(
|
||||||
self.sheerka.ret(
|
self.sheerka.ret(
|
||||||
self.name,
|
self.name,
|
||||||
|
|||||||
@@ -1,912 +0,0 @@
|
|||||||
# #####################################################################################################
|
|
||||||
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
|
||||||
# # I don't directly use the project, but it helped me figure out
|
|
||||||
# # what to do.
|
|
||||||
# # Dejanović I., Milosavljević G., Vaderna R.:
|
|
||||||
# # Arpeggio: A flexible PEG parser for Python,
|
|
||||||
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
|
|
||||||
# #####################################################################################################
|
|
||||||
# from collections import namedtuple
|
|
||||||
# from dataclasses import dataclass
|
|
||||||
# from collections import defaultdict
|
|
||||||
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
|
||||||
# from core.concept import Concept, ConceptParts, DoNotResolve
|
|
||||||
# from core.tokenizer import TokenKind, Tokenizer, Token
|
|
||||||
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
|
||||||
# from parsers.BaseParser import BaseParser, ErrorNode
|
|
||||||
# import core.utils
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class NonTerminalNode(LexerNode):
|
|
||||||
# """
|
|
||||||
# Returned by the BnfNodeParser
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, parsing_expression, start, end, tokens, children=None):
|
|
||||||
# super().__init__(start, end, tokens)
|
|
||||||
# self.parsing_expression = parsing_expression
|
|
||||||
# self.children = children
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
|
||||||
# if len(self.children) > 0:
|
|
||||||
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
|
||||||
# else:
|
|
||||||
# sub_names = ""
|
|
||||||
# return name + sub_names
|
|
||||||
#
|
|
||||||
# def __eq__(self, other):
|
|
||||||
# if not isinstance(other, NonTerminalNode):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return self.parsing_expression == other.parsing_expression and \
|
|
||||||
# self.start == other.start and \
|
|
||||||
# self.end == other.end and \
|
|
||||||
# self.children == other.children
|
|
||||||
#
|
|
||||||
# def __hash__(self):
|
|
||||||
# return hash((self.parsing_expression, self.start, self.end, self.children))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class TerminalNode(LexerNode):
|
|
||||||
# """
|
|
||||||
# Returned by the BnfNodeParser
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, parsing_expression, start, end, value):
|
|
||||||
# super().__init__(start, end, source=value)
|
|
||||||
# self.parsing_expression = parsing_expression
|
|
||||||
# self.value = value
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# name = self.parsing_expression.rule_name or ""
|
|
||||||
# return name + f"'{self.value}'"
|
|
||||||
#
|
|
||||||
# def __eq__(self, other):
|
|
||||||
# if not isinstance(other, TerminalNode):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return self.parsing_expression == other.parsing_expression and \
|
|
||||||
# self.start == other.start and \
|
|
||||||
# self.end == other.end and \
|
|
||||||
# self.value == other.value
|
|
||||||
#
|
|
||||||
# def __hash__(self):
|
|
||||||
# return hash((self.parsing_expression, self.start, self.end, self.value))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# @dataclass()
|
|
||||||
# class UnknownConceptNode(ErrorNode):
|
|
||||||
# concept_key: str
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# @dataclass()
|
|
||||||
# class TooManyConceptNode(ErrorNode):
|
|
||||||
# concept_key: str
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ParsingExpression:
|
|
||||||
# def __init__(self, *args, **kwargs):
|
|
||||||
# self.elements = args
|
|
||||||
#
|
|
||||||
# nodes = kwargs.get('nodes', [])
|
|
||||||
# if not hasattr(nodes, '__iter__'):
|
|
||||||
# nodes = [nodes]
|
|
||||||
# self.nodes = nodes
|
|
||||||
#
|
|
||||||
# self.rule_name = kwargs.get('rule_name', '')
|
|
||||||
#
|
|
||||||
# def __eq__(self, other):
|
|
||||||
# if not isinstance(other, ParsingExpression):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return self.rule_name == other.rule_name and self.elements == other.elements
|
|
||||||
#
|
|
||||||
# def __hash__(self):
|
|
||||||
# return hash((self.rule_name, self.elements))
|
|
||||||
#
|
|
||||||
# def parse(self, parser):
|
|
||||||
# return self._parse(parser)
|
|
||||||
#
|
|
||||||
# def add_rule_name_if_needed(self, text):
|
|
||||||
# return text + "=" + self.rule_name if self.rule_name else text
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ConceptExpression(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Will match a concept
|
|
||||||
# It used only for rule definition
|
|
||||||
#
|
|
||||||
# When the grammar is created, it is replaced by the actual concept
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, concept, rule_name=""):
|
|
||||||
# super().__init__(rule_name=rule_name)
|
|
||||||
# self.concept = concept
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# return self.add_rule_name_if_needed(f"{self.concept}")
|
|
||||||
#
|
|
||||||
# def __eq__(self, other):
|
|
||||||
# if not super().__eq__(other):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# if not isinstance(other, ConceptExpression):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# if isinstance(self.concept, Concept):
|
|
||||||
# return self.concept.name == other.concept.name
|
|
||||||
#
|
|
||||||
# # when it's only the name of the concept
|
|
||||||
# return self.concept == other.concept
|
|
||||||
#
|
|
||||||
# def __hash__(self):
|
|
||||||
# return hash((self.concept, self.rule_name))
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_parsing_expression_from_name(name):
|
|
||||||
# tokens = Tokenizer(name)
|
|
||||||
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
|
|
||||||
# if len(nodes) == 1:
|
|
||||||
# return nodes[0]
|
|
||||||
# else:
|
|
||||||
# sequence = Sequence(nodes)
|
|
||||||
# sequence.nodes = nodes
|
|
||||||
# return sequence
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
|
||||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# self.concept = to_match # Memoize
|
|
||||||
#
|
|
||||||
# if to_match not in parser.concepts_grammars:
|
|
||||||
# # Try to match the concept using its name
|
|
||||||
# expr = self.get_parsing_expression_from_name(to_match.name)
|
|
||||||
# node = expr.parse(parser)
|
|
||||||
# else:
|
|
||||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
|
||||||
#
|
|
||||||
# if node is None:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ConceptGroupExpression(ConceptExpression):
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
|
||||||
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# self.concept = to_match # Memoize
|
|
||||||
#
|
|
||||||
# if to_match not in parser.concepts_grammars:
|
|
||||||
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
|
|
||||||
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
|
||||||
# expr = OrderedChoice(nodes)
|
|
||||||
# expr.nodes = nodes
|
|
||||||
# node = expr.parse(parser)
|
|
||||||
# else:
|
|
||||||
# node = parser.concepts_grammars[to_match].parse(parser)
|
|
||||||
#
|
|
||||||
# if node is None:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class Sequence(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Will match sequence of parser expressions in exact order they are defined.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# init_pos = parser.pos
|
|
||||||
# end_pos = parser.pos
|
|
||||||
#
|
|
||||||
# children = []
|
|
||||||
# for e in self.nodes:
|
|
||||||
# node = e.parse(parser)
|
|
||||||
# if node is None:
|
|
||||||
# return None
|
|
||||||
# else:
|
|
||||||
# if node.end != -1: # because returns -1 when no match
|
|
||||||
# children.append(node)
|
|
||||||
# end_pos = node.end
|
|
||||||
#
|
|
||||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
|
||||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class OrderedChoice(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Will match one among multiple
|
|
||||||
# It will stop at the first match (so the order of definition is important)
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# init_pos = parser.pos
|
|
||||||
#
|
|
||||||
# for e in self.nodes:
|
|
||||||
# node = e.parse(parser)
|
|
||||||
# if node:
|
|
||||||
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
|
||||||
#
|
|
||||||
# parser.seek(init_pos) # backtrack
|
|
||||||
#
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# to_str = "| ".join(repr(n) for n in self.elements)
|
|
||||||
# return self.add_rule_name_if_needed(f"({to_str})")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class Optional(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Will match or not the elements
|
|
||||||
# if many matches, will choose longest one
|
|
||||||
# If you need order, use Optional(OrderedChoice)
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# init_pos = parser.pos
|
|
||||||
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
|
||||||
#
|
|
||||||
# for e in self.nodes:
|
|
||||||
# node = e.parse(parser)
|
|
||||||
# if node:
|
|
||||||
# if node.end > selected_node.end:
|
|
||||||
# selected_node = NonTerminalNode(
|
|
||||||
# self,
|
|
||||||
# node.start,
|
|
||||||
# node.end,
|
|
||||||
# parser.tokens[node.start: node.end + 1],
|
|
||||||
# [node])
|
|
||||||
#
|
|
||||||
# parser.seek(init_pos) # backtrack
|
|
||||||
#
|
|
||||||
# if selected_node.end != -1:
|
|
||||||
# parser.seek(selected_node.end)
|
|
||||||
# parser.next_token() # eat the tokens found
|
|
||||||
#
|
|
||||||
# return selected_node
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# if len(self.elements) == 1:
|
|
||||||
# return f"{self.elements[0]}?"
|
|
||||||
# else:
|
|
||||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
|
||||||
# return self.add_rule_name_if_needed(f"({to_str})?")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class Repetition(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Base class for all repetition-like parser expressions (?,*,+)
|
|
||||||
# Args:
|
|
||||||
# eolterm(bool): Flag that indicates that end of line should
|
|
||||||
# terminate repetition match.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, *elements, **kwargs):
|
|
||||||
# super(Repetition, self).__init__(*elements, **kwargs)
|
|
||||||
# self.sep = kwargs.get('sep', None)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ZeroOrMore(Repetition):
|
|
||||||
# """
|
|
||||||
# ZeroOrMore will try to match parser expression specified zero or more
|
|
||||||
# times. It will never fail.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# init_pos = parser.pos
|
|
||||||
# end_pos = -1
|
|
||||||
# children = []
|
|
||||||
#
|
|
||||||
# while True:
|
|
||||||
# current_pos = parser.pos
|
|
||||||
#
|
|
||||||
# # maybe eat the separator if needed
|
|
||||||
# if self.sep and children:
|
|
||||||
# sep_result = self.sep.parse(parser)
|
|
||||||
# if sep_result is None:
|
|
||||||
# parser.seek(current_pos)
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
# # eat the ZeroOrMore
|
|
||||||
# node = self.nodes[0].parse(parser)
|
|
||||||
# if node is None:
|
|
||||||
# parser.seek(current_pos)
|
|
||||||
# break
|
|
||||||
# else:
|
|
||||||
# if node.end != -1: # because returns -1 when no match
|
|
||||||
# children.append(node)
|
|
||||||
# end_pos = node.end
|
|
||||||
#
|
|
||||||
# if len(children) == 0:
|
|
||||||
# return NonTerminalNode(self, init_pos, -1, [], [])
|
|
||||||
#
|
|
||||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
|
||||||
# return self.add_rule_name_if_needed(f"({to_str})*")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class OneOrMore(Repetition):
|
|
||||||
# """
|
|
||||||
# OneOrMore will try to match parser expression specified one or more times.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# init_pos = parser.pos
|
|
||||||
# end_pos = -1
|
|
||||||
# children = []
|
|
||||||
#
|
|
||||||
# while True:
|
|
||||||
# current_pos = parser.pos
|
|
||||||
#
|
|
||||||
# # maybe eat the separator if needed
|
|
||||||
# if self.sep and children:
|
|
||||||
# sep_result = self.sep.parse(parser)
|
|
||||||
# if sep_result is None:
|
|
||||||
# parser.seek(current_pos)
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
# # eat the ZeroOrMore
|
|
||||||
# node = self.nodes[0].parse(parser)
|
|
||||||
# if node is None:
|
|
||||||
# parser.seek(current_pos)
|
|
||||||
# break
|
|
||||||
# else:
|
|
||||||
# if node.end != -1: # because returns -1 when no match
|
|
||||||
# children.append(node)
|
|
||||||
# end_pos = node.end
|
|
||||||
#
|
|
||||||
# if len(children) == 0: # if nothing is found, it's an error
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
|
||||||
# return self.add_rule_name_if_needed(f"({to_str})+")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class UnorderedGroup(Repetition):
|
|
||||||
# """
|
|
||||||
# Will try to match all of the parsing expression in any order.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# raise NotImplementedError()
|
|
||||||
#
|
|
||||||
# # def __repr__(self):
|
|
||||||
# # to_str = ", ".join(repr(n) for n in self.elements)
|
|
||||||
# # return f"({to_str})#"
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class Match(ParsingExpression):
|
|
||||||
# """
|
|
||||||
# Base class for all classes that will try to match something from the input.
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, rule_name, root=False):
|
|
||||||
# super(Match, self).__init__(rule_name=rule_name, root=root)
|
|
||||||
#
|
|
||||||
# def parse(self, parser):
|
|
||||||
# result = self._parse(parser)
|
|
||||||
# return result
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class StrMatch(Match):
|
|
||||||
# """
|
|
||||||
# Matches a literal
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, to_match, rule_name="", ignore_case=True):
|
|
||||||
# super(Match, self).__init__(rule_name=rule_name)
|
|
||||||
# self.to_match = to_match
|
|
||||||
# self.ignore_case = ignore_case
|
|
||||||
#
|
|
||||||
# def __repr__(self):
|
|
||||||
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
|
||||||
#
|
|
||||||
# def __eq__(self, other):
|
|
||||||
# if not super().__eq__(other):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# if not isinstance(other, StrMatch):
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
|
||||||
#
|
|
||||||
# def _parse(self, parser):
|
|
||||||
# token = parser.get_token()
|
|
||||||
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
|
|
||||||
# else token.value == self.to_match
|
|
||||||
#
|
|
||||||
# if m:
|
|
||||||
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
|
|
||||||
# parser.next_token()
|
|
||||||
# return node
|
|
||||||
#
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class BnfNodeParser(BaseParser):
|
|
||||||
# def __init__(self, **kwargs):
|
|
||||||
# super().__init__("BnfNode_old", 50)
|
|
||||||
# self.enabled = False
|
|
||||||
# if 'grammars' in kwargs:
|
|
||||||
# self.concepts_grammars = kwargs.get("grammars")
|
|
||||||
# elif 'sheerka' in kwargs:
|
|
||||||
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
|
||||||
# else:
|
|
||||||
# self.concepts_grammars = {}
|
|
||||||
#
|
|
||||||
# self.ignore_case = True
|
|
||||||
#
|
|
||||||
# self.token = None
|
|
||||||
# self.pos = -1
|
|
||||||
# self.tokens = None
|
|
||||||
#
|
|
||||||
# self.context = None
|
|
||||||
# self.text = None
|
|
||||||
# self.sheerka = None
|
|
||||||
#
|
|
||||||
# def add_error(self, error, next_token=True):
|
|
||||||
# self.error_sink.append(error)
|
|
||||||
# if next_token:
|
|
||||||
# self.next_token()
|
|
||||||
# return error
|
|
||||||
#
|
|
||||||
# def reset_parser(self, context, text):
|
|
||||||
# self.context = context
|
|
||||||
# self.sheerka = context.sheerka
|
|
||||||
# self.text = text
|
|
||||||
#
|
|
||||||
# try:
|
|
||||||
# self.tokens = list(self.get_input_as_tokens(text))
|
|
||||||
# except core.tokenizer.LexerError as e:
|
|
||||||
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# self.token = None
|
|
||||||
# self.pos = -1
|
|
||||||
# self.next_token(False)
|
|
||||||
# return True
|
|
||||||
#
|
|
||||||
# def get_token(self) -> Token:
|
|
||||||
# return self.token
|
|
||||||
#
|
|
||||||
# def next_token(self, skip_whitespace=True):
|
|
||||||
# if self.token and self.token.type == TokenKind.EOF:
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# self.pos += 1
|
|
||||||
# self.token = self.tokens[self.pos]
|
|
||||||
#
|
|
||||||
# if skip_whitespace:
|
|
||||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
|
||||||
# self.pos += 1
|
|
||||||
# self.token = self.tokens[self.pos]
|
|
||||||
#
|
|
||||||
# return self.token.type != TokenKind.EOF
|
|
||||||
#
|
|
||||||
# def seek(self, pos):
|
|
||||||
# self.pos = pos
|
|
||||||
# self.token = self.tokens[self.pos]
|
|
||||||
# return True
|
|
||||||
#
|
|
||||||
# def rewind(self, offset, skip_whitespace=True):
|
|
||||||
# self.pos += offset
|
|
||||||
# self.token = self.tokens[self.pos]
|
|
||||||
#
|
|
||||||
# if skip_whitespace:
|
|
||||||
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
|
|
||||||
# self.pos -= 1
|
|
||||||
# self.token = self.tokens[self.pos]
|
|
||||||
#
|
|
||||||
# def initialize(self, context, concepts_definitions):
|
|
||||||
# """
|
|
||||||
# Adds a bunch of concepts, and how they can be recognized
|
|
||||||
# :param context: execution context
|
|
||||||
# :param concepts_definitions: dictionary of concept, concept_definition
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# self.context = context
|
|
||||||
# self.sheerka = context.sheerka
|
|
||||||
# concepts_to_resolve = set()
|
|
||||||
#
|
|
||||||
# for concept, concept_def in concepts_definitions.items():
|
|
||||||
# # ## Gets the grammars
|
|
||||||
# context.log(f"Resolving grammar for '{concept}'", context.who)
|
|
||||||
# concept.init_key() # make sure that the key is initialized
|
|
||||||
# grammar = self.get_model(concept_def, concepts_to_resolve)
|
|
||||||
# self.concepts_grammars[concept] = grammar
|
|
||||||
#
|
|
||||||
# if self.has_error:
|
|
||||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
|
||||||
#
|
|
||||||
# # ## Removes concepts with infinite recursions
|
|
||||||
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
|
||||||
# for concept in concepts_to_remove:
|
|
||||||
# concepts_to_resolve.remove(concept)
|
|
||||||
# del self.concepts_grammars[concept]
|
|
||||||
#
|
|
||||||
# if self.has_error:
|
|
||||||
# return self.sheerka.ret(self.name, False, self.error_sink)
|
|
||||||
# else:
|
|
||||||
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
|
||||||
#
|
|
||||||
# def get_concept(self, concept_name):
|
|
||||||
# if concept_name in self.context.concepts:
|
|
||||||
# return self.context.concepts[concept_name]
|
|
||||||
# return self.sheerka.get_by_key(concept_name)
|
|
||||||
#
|
|
||||||
# def get_model(self, concept_def, concepts_to_resolve):
|
|
||||||
#
|
|
||||||
# # TODO
|
|
||||||
# # inner_get_model must not modify the initial ParsingExpression
|
|
||||||
# # A copy must be created
|
|
||||||
# def inner_get_model(expression):
|
|
||||||
# if isinstance(expression, Concept):
|
|
||||||
# if self.sheerka.isaset(self.context, expression):
|
|
||||||
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
|
||||||
# else:
|
|
||||||
# ret = ConceptExpression(expression, rule_name=expression.name)
|
|
||||||
# concepts_to_resolve.add(expression)
|
|
||||||
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
|
|
||||||
# if expression.rule_name is None or expression.rule_name == "":
|
|
||||||
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
|
||||||
# else expression.concept
|
|
||||||
# if isinstance(expression.concept, str):
|
|
||||||
# concept = self.get_concept(expression.concept)
|
|
||||||
# if self.sheerka.is_known(concept):
|
|
||||||
# expression.concept = concept
|
|
||||||
# concepts_to_resolve.add(expression.concept)
|
|
||||||
# ret = expression
|
|
||||||
# elif isinstance(expression, str):
|
|
||||||
# ret = StrMatch(expression, ignore_case=self.ignore_case)
|
|
||||||
# elif isinstance(expression, StrMatch):
|
|
||||||
# ret = expression
|
|
||||||
# if ret.ignore_case is None:
|
|
||||||
# ret.ignore_case = self.ignore_case
|
|
||||||
# elif isinstance(expression, Sequence) or \
|
|
||||||
# isinstance(expression, OrderedChoice) or \
|
|
||||||
# isinstance(expression, ZeroOrMore) or \
|
|
||||||
# isinstance(expression, OneOrMore) or \
|
|
||||||
# isinstance(expression, Optional):
|
|
||||||
# ret = expression
|
|
||||||
# ret.nodes = [inner_get_model(e) for e in ret.elements]
|
|
||||||
# else:
|
|
||||||
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
|
||||||
#
|
|
||||||
# # Translate separator expression.
|
|
||||||
# if isinstance(expression, Repetition) and expression.sep:
|
|
||||||
# expression.sep = inner_get_model(expression.sep)
|
|
||||||
#
|
|
||||||
# return ret
|
|
||||||
#
|
|
||||||
# model = inner_get_model(concept_def)
|
|
||||||
#
|
|
||||||
# return model
|
|
||||||
#
|
|
||||||
# def detect_infinite_recursion(self, concepts_to_resolve):
|
|
||||||
#
|
|
||||||
# # infinite recursion matcher
|
|
||||||
# def _is_infinite_recursion(ref_concept, node):
|
|
||||||
# if isinstance(node, ConceptExpression):
|
|
||||||
# if node.concept == ref_concept:
|
|
||||||
# return True
|
|
||||||
#
|
|
||||||
# if isinstance(node.concept, str):
|
|
||||||
# to_match = self.get_concept(node.concept)
|
|
||||||
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# return False
|
|
||||||
# else:
|
|
||||||
# to_match = node.concept
|
|
||||||
#
|
|
||||||
# if to_match not in self.concepts_grammars:
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
|
||||||
#
|
|
||||||
# if isinstance(node, OrderedChoice):
|
|
||||||
# return _is_infinite_recursion(ref_concept, node.nodes[0])
|
|
||||||
#
|
|
||||||
# if isinstance(node, Sequence):
|
|
||||||
# for node in node.nodes:
|
|
||||||
# if _is_infinite_recursion(ref_concept, node):
|
|
||||||
# return True
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# removed_concepts = []
|
|
||||||
# for e in concepts_to_resolve:
|
|
||||||
# if isinstance(e, str):
|
|
||||||
# e = self.get_concept(e)
|
|
||||||
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# if e not in self.concepts_grammars:
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# to_resolve = self.concepts_grammars[e]
|
|
||||||
# if _is_infinite_recursion(e, to_resolve):
|
|
||||||
# removed_concepts.append(e)
|
|
||||||
# return removed_concepts
|
|
||||||
#
|
|
||||||
# def parse(self, context, parser_input):
|
|
||||||
# if parser_input == "":
|
|
||||||
# return context.sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# False,
|
|
||||||
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# if not self.reset_parser(context, parser_input):
|
|
||||||
# return self.sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# False,
|
|
||||||
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
|
||||||
#
|
|
||||||
# concepts_found = [[]]
|
|
||||||
# unrecognized_tokens = None
|
|
||||||
# has_unrecognized = False
|
|
||||||
#
|
|
||||||
# # actually list of list
|
|
||||||
# # The first dimension is the number of possibilities found
|
|
||||||
# # The second dimension is the number of concepts found, under one possibility
|
|
||||||
# #
|
|
||||||
# # Example 1
|
|
||||||
# # concept foo : 'one' 'two'
|
|
||||||
# # concept bar : 'one' 'two'
|
|
||||||
# # input 'one two' -> will produce two possibilities (foo and bar).
|
|
||||||
# #
|
|
||||||
# # Example 2
|
|
||||||
# # concept foo : 'one'
|
|
||||||
# # concept bar : 'two'
|
|
||||||
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
|
|
||||||
#
|
|
||||||
# while True:
|
|
||||||
# init_pos = self.pos
|
|
||||||
# res = []
|
|
||||||
#
|
|
||||||
# for concept, grammar in self.concepts_grammars.items():
|
|
||||||
# self.seek(init_pos)
|
|
||||||
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
|
||||||
# if node is not None and node.end != -1:
|
|
||||||
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
|
|
||||||
# concept_node = ConceptNode(
|
|
||||||
# updated_concept,
|
|
||||||
# node.start,
|
|
||||||
# node.end,
|
|
||||||
# self.tokens[node.start: node.end + 1],
|
|
||||||
# None,
|
|
||||||
# node)
|
|
||||||
# res.append(concept_node)
|
|
||||||
#
|
|
||||||
# if len(res) == 0: # not recognized
|
|
||||||
# self.seek(init_pos)
|
|
||||||
# if unrecognized_tokens:
|
|
||||||
# unrecognized_tokens.add_token(self.get_token(), init_pos)
|
|
||||||
# else:
|
|
||||||
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
|
|
||||||
#
|
|
||||||
# if not self.next_token(False):
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
# else: # some concepts are recognized
|
|
||||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
|
||||||
# unrecognized_tokens.fix_source()
|
|
||||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
|
||||||
# has_unrecognized = True
|
|
||||||
# unrecognized_tokens = None
|
|
||||||
#
|
|
||||||
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
|
||||||
# concepts_found = core.utils.product(concepts_found, res)
|
|
||||||
#
|
|
||||||
# # loop
|
|
||||||
# self.seek(res[0].end)
|
|
||||||
# if not self.next_token(False):
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
# # Fix the source for unrecognized tokens
|
|
||||||
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
|
||||||
# unrecognized_tokens.fix_source()
|
|
||||||
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
|
||||||
# has_unrecognized = True
|
|
||||||
#
|
|
||||||
# # else
|
|
||||||
# # returns as many ReturnValue than choices found
|
|
||||||
# ret = []
|
|
||||||
# for choice in concepts_found:
|
|
||||||
# ret.append(
|
|
||||||
# self.sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# not has_unrecognized,
|
|
||||||
# self.sheerka.new(
|
|
||||||
# BuiltinConcepts.PARSER_RESULT,
|
|
||||||
# parser=self,
|
|
||||||
# source=parser_input,
|
|
||||||
# body=choice,
|
|
||||||
# try_parsed=choice)))
|
|
||||||
#
|
|
||||||
# if len(ret) == 1:
|
|
||||||
# self.log_result(context, parser_input, ret[0])
|
|
||||||
# return ret[0]
|
|
||||||
# else:
|
|
||||||
# self.log_multiple_results(context, parser_input, ret)
|
|
||||||
# return ret
|
|
||||||
#
|
|
||||||
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
|
||||||
# """
|
|
||||||
# Updates the properties of the concept
|
|
||||||
# Goes in recursion if the property is a concept
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# # this cache is to make sure that we return the same concept for the same ConceptExpression
|
|
||||||
# _underlying_value_cache = {}
|
|
||||||
#
|
|
||||||
# def _add_prop(_concept, prop_name, value):
|
|
||||||
# """
|
|
||||||
# Adds a new entry,
|
|
||||||
# makes a list if the property already exists
|
|
||||||
# """
|
|
||||||
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
|
|
||||||
# # new entry
|
|
||||||
# _concept.compiled[prop_name] = value
|
|
||||||
# else:
|
|
||||||
# # make a list if there was a value
|
|
||||||
# previous_value = _concept.compiled[prop_name]
|
|
||||||
# if isinstance(previous_value, list):
|
|
||||||
# previous_value.append(value)
|
|
||||||
# else:
|
|
||||||
# new_value = [previous_value, value]
|
|
||||||
# _concept.compiled[prop_name] = new_value
|
|
||||||
#
|
|
||||||
# def _look_for_concept_match(_underlying):
|
|
||||||
# """
|
|
||||||
# At some point, there is either an StrMatch or a ConceptMatch,
|
|
||||||
# that allowed the recognition.
|
|
||||||
# Look for the ConceptMatch, with recursion if needed
|
|
||||||
# """
|
|
||||||
# if isinstance(_underlying.parsing_expression, ConceptExpression):
|
|
||||||
# return _underlying
|
|
||||||
#
|
|
||||||
# if not isinstance(_underlying, NonTerminalNode):
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# if len(_underlying.children) != 1:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return _look_for_concept_match(_underlying.children[0])
|
|
||||||
#
|
|
||||||
# def _get_underlying_value(_underlying):
|
|
||||||
# concept_match_node = _look_for_concept_match(_underlying)
|
|
||||||
# if concept_match_node:
|
|
||||||
# # the value is a concept
|
|
||||||
# if id(concept_match_node) in _underlying_value_cache:
|
|
||||||
# result = _underlying_value_cache[id(concept_match_node)]
|
|
||||||
# else:
|
|
||||||
# ref_tpl = concept_match_node.parsing_expression.concept
|
|
||||||
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
|
||||||
# _underlying_value_cache[id(concept_match_node)] = result
|
|
||||||
# else:
|
|
||||||
# # the value is a string
|
|
||||||
# result = DoNotResolve(_underlying.source)
|
|
||||||
#
|
|
||||||
# return result
|
|
||||||
#
|
|
||||||
# def _process_rule_name(_concept, _underlying):
|
|
||||||
# if _underlying.parsing_expression.rule_name:
|
|
||||||
# value = _get_underlying_value(_underlying)
|
|
||||||
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
|
||||||
# _concept.metadata.need_validation = True
|
|
||||||
#
|
|
||||||
# if isinstance(_underlying, NonTerminalNode):
|
|
||||||
# for child in _underlying.children:
|
|
||||||
# _process_rule_name(_concept, child)
|
|
||||||
#
|
|
||||||
# key = (template.key, template.id) if template.id else template.key
|
|
||||||
# concept = sheerka.new(key)
|
|
||||||
# if init_empty_body and concept.metadata.body is None:
|
|
||||||
# value = _get_underlying_value(underlying)
|
|
||||||
# concept.compiled[ConceptParts.BODY] = value
|
|
||||||
# if underlying.parsing_expression.rule_name:
|
|
||||||
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
|
|
||||||
# # KSI : Why don't we set concept.metadata.need_validation to True ?
|
|
||||||
#
|
|
||||||
# if isinstance(underlying, NonTerminalNode):
|
|
||||||
# for node in underlying.children:
|
|
||||||
# _process_rule_name(concept, node)
|
|
||||||
#
|
|
||||||
# return concept
|
|
||||||
#
|
|
||||||
# def encode_grammar(self, grammar):
|
|
||||||
# """
|
|
||||||
# Transform the grammar into something that can easily can be serialized
|
|
||||||
# :param grammar:
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def _encode(expression):
|
|
||||||
# if isinstance(expression, StrMatch):
|
|
||||||
# res = f"'{expression.to_match}'"
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, ConceptExpression):
|
|
||||||
# res = core.utils.str_concept(expression.concept)
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, Sequence):
|
|
||||||
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, OrderedChoice):
|
|
||||||
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, Optional):
|
|
||||||
# res = _encode(expression.nodes[0]) + "?"
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, ZeroOrMore):
|
|
||||||
# res = _encode(expression.nodes[0]) + "*"
|
|
||||||
#
|
|
||||||
# elif isinstance(expression, OneOrMore):
|
|
||||||
# res = _encode(expression.nodes[0]) + "+"
|
|
||||||
#
|
|
||||||
# if expression.rule_name:
|
|
||||||
# res += "=" + expression.rule_name
|
|
||||||
#
|
|
||||||
# return res
|
|
||||||
#
|
|
||||||
# result = {}
|
|
||||||
# for k, v in grammar.items():
|
|
||||||
# key = core.utils.str_concept(k)
|
|
||||||
# value = _encode(v)
|
|
||||||
# result[key] = value
|
|
||||||
# return result
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_bests(results):
|
|
||||||
# """
|
|
||||||
# Returns the result that is the longest
|
|
||||||
# :param results:
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
# by_end_pos = defaultdict(list)
|
|
||||||
# for result in results:
|
|
||||||
# by_end_pos[result.end].append(result)
|
|
||||||
#
|
|
||||||
# return by_end_pos[max(by_end_pos)]
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ParsingExpressionVisitor:
|
|
||||||
# """
|
|
||||||
# visit ParsingExpression
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def visit(self, parsing_expression):
|
|
||||||
# name = parsing_expression.__class__.__name__
|
|
||||||
#
|
|
||||||
# method = 'visit_' + name
|
|
||||||
# visitor = getattr(self, method, self.generic_visit)
|
|
||||||
# return visitor(parsing_expression)
|
|
||||||
#
|
|
||||||
# def generic_visit(self, parsing_expression):
|
|
||||||
# if hasattr(self, "visit_all"):
|
|
||||||
# self.visit_all(parsing_expression)
|
|
||||||
#
|
|
||||||
# for node in parsing_expression.elements:
|
|
||||||
# if isinstance(node, Concept):
|
|
||||||
# self.visit(ConceptExpression(node.key or node.name))
|
|
||||||
# elif isinstance(node, str):
|
|
||||||
# self.visit(StrMatch(node))
|
|
||||||
# else:
|
|
||||||
# self.visit(node)
|
|
||||||
@@ -1,108 +0,0 @@
|
|||||||
# # try to match something like
|
|
||||||
# # ConceptNode 'plus' ConceptNode
|
|
||||||
# #
|
|
||||||
# # Replaced by SyaNodeParser
|
|
||||||
# from core.builtin_concepts import BuiltinConcepts
|
|
||||||
# from core.tokenizer import TokenKind, Token
|
|
||||||
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
|
||||||
# from parsers.BaseParser import BaseParser
|
|
||||||
# from parsers.MultipleConceptsParser import MultipleConceptsParser
|
|
||||||
# from core.concept import VARIABLE_PREFIX
|
|
||||||
#
|
|
||||||
# multiple_concepts_parser = MultipleConceptsParser()
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class ConceptsWithConceptsParser(BaseParser):
|
|
||||||
# def __init__(self, **kwargs):
|
|
||||||
# super().__init__("ConceptsWithConcepts", 25)
|
|
||||||
# self.enabled = False
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_tokens(nodes):
|
|
||||||
# tokens = []
|
|
||||||
#
|
|
||||||
# for node in nodes:
|
|
||||||
# if isinstance(node, ConceptNode):
|
|
||||||
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
|
||||||
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
|
||||||
# else:
|
|
||||||
# for token in node.tokens:
|
|
||||||
# if token.type == TokenKind.EOF:
|
|
||||||
# break
|
|
||||||
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
|
||||||
# continue
|
|
||||||
# else:
|
|
||||||
# tokens.append(token)
|
|
||||||
#
|
|
||||||
# return tokens
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_key(nodes):
|
|
||||||
# key = ""
|
|
||||||
# index = 0
|
|
||||||
# for node in nodes:
|
|
||||||
# if key:
|
|
||||||
# key += " "
|
|
||||||
#
|
|
||||||
# if isinstance(node, UnrecognizedTokensNode):
|
|
||||||
# key += node.source.strip()
|
|
||||||
# else:
|
|
||||||
# key += f"{VARIABLE_PREFIX}{index}"
|
|
||||||
# index += 1
|
|
||||||
#
|
|
||||||
# return key
|
|
||||||
#
|
|
||||||
# def finalize_concept(self, context, concept, nodes):
|
|
||||||
# index = 0
|
|
||||||
# for node in nodes:
|
|
||||||
#
|
|
||||||
# if isinstance(node, ConceptNode):
|
|
||||||
# prop_name = list(concept.props.keys())[index]
|
|
||||||
# concept.compiled[prop_name] = node.concept
|
|
||||||
# context.log(
|
|
||||||
# f"Setting property '{prop_name}='{node.concept}'.",
|
|
||||||
# self.name)
|
|
||||||
# index += 1
|
|
||||||
# elif isinstance(node, SourceCodeNode):
|
|
||||||
# prop_name = list(concept.props.keys())[index]
|
|
||||||
# sheerka = context.sheerka
|
|
||||||
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
|
||||||
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
|
||||||
# context.log(
|
|
||||||
# f"Setting property '{prop_name}'='Python({node.source})'.",
|
|
||||||
# self.name)
|
|
||||||
# index += 1
|
|
||||||
#
|
|
||||||
# return concept
|
|
||||||
#
|
|
||||||
# def parse(self, context, parser_input):
|
|
||||||
# sheerka = context.sheerka
|
|
||||||
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
|
||||||
# if not nodes:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# concept_key = self.get_key(nodes)
|
|
||||||
# concept = sheerka.new(concept_key)
|
|
||||||
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# return sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# False,
|
|
||||||
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
|
||||||
#
|
|
||||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
|
||||||
# for concept in concepts:
|
|
||||||
# self.finalize_concept(context, concept, nodes)
|
|
||||||
#
|
|
||||||
# res = []
|
|
||||||
# for concept in concepts:
|
|
||||||
# res.append(sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# True,
|
|
||||||
# sheerka.new(
|
|
||||||
# BuiltinConcepts.PARSER_RESULT,
|
|
||||||
# parser=self,
|
|
||||||
# source=parser_input.source,
|
|
||||||
# body=concept,
|
|
||||||
# try_parsed=None)))
|
|
||||||
#
|
|
||||||
# return res[0] if len(res) == 1 else res
|
|
||||||
@@ -1,163 +0,0 @@
|
|||||||
# # to be replaced by SyaNodeParser
|
|
||||||
# import ast
|
|
||||||
#
|
|
||||||
# from core.builtin_concepts import BuiltinConcepts
|
|
||||||
# from core.tokenizer import TokenKind
|
|
||||||
# from parsers.BaseNodeParser import SourceCodeNode
|
|
||||||
# from parsers.BaseParser import BaseParser
|
|
||||||
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
|
||||||
# import core.utils
|
|
||||||
# from parsers.PythonParser import PythonParser
|
|
||||||
#
|
|
||||||
# concept_lexer_parser = BnfNodeParser()
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class MultipleConceptsParser(BaseParser):
|
|
||||||
# """
|
|
||||||
# Parser that will take the result of BnfNodeParser and
|
|
||||||
# try to resolve the unrecognized tokens token by token
|
|
||||||
#
|
|
||||||
# It is a success when it returns a list ConceptNode exclusively
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# def __init__(self, **kwargs):
|
|
||||||
# BaseParser.__init__(self, "MultipleConcepts", 45)
|
|
||||||
# self.enabled = False
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def finalize(nodes_found, unrecognized_tokens):
|
|
||||||
# if not unrecognized_tokens:
|
|
||||||
# return nodes_found, unrecognized_tokens
|
|
||||||
#
|
|
||||||
# unrecognized_tokens.fix_source()
|
|
||||||
# if unrecognized_tokens.not_whitespace():
|
|
||||||
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
|
||||||
#
|
|
||||||
# return nodes_found, None
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def create_or_add(unrecognized_tokens, token, index):
|
|
||||||
# if unrecognized_tokens:
|
|
||||||
# unrecognized_tokens.add_token(token, index)
|
|
||||||
# else:
|
|
||||||
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
|
||||||
# return unrecognized_tokens
|
|
||||||
#
|
|
||||||
# def parse(self, context, parser_input):
|
|
||||||
# sheerka = context.sheerka
|
|
||||||
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
|
||||||
# if not nodes:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# nodes_found = [[]]
|
|
||||||
# concepts_only = True
|
|
||||||
#
|
|
||||||
# for node in nodes:
|
|
||||||
# if isinstance(node, UnrecognizedTokensNode):
|
|
||||||
# unrecognized_tokens = None
|
|
||||||
# i = 0
|
|
||||||
#
|
|
||||||
# while i < len(node.tokens):
|
|
||||||
#
|
|
||||||
# token_index = node.start + i
|
|
||||||
# token = node.tokens[i]
|
|
||||||
#
|
|
||||||
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
|
||||||
# if concepts_nodes is not None:
|
|
||||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
||||||
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
|
||||||
# i += 1
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
|
||||||
# if source_code_node:
|
|
||||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
||||||
# nodes_found = core.utils.product(nodes_found, [source_code_node])
|
|
||||||
# i += len(source_code_node.tokens)
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# # not a concept nor some source code
|
|
||||||
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
|
||||||
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
|
||||||
# i += 1
|
|
||||||
#
|
|
||||||
# # finish processing if needed
|
|
||||||
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# nodes_found = core.utils.product(nodes_found, [node])
|
|
||||||
#
|
|
||||||
# ret = []
|
|
||||||
# for choice in nodes_found:
|
|
||||||
# ret.append(
|
|
||||||
# sheerka.ret(
|
|
||||||
# self.name,
|
|
||||||
# concepts_only,
|
|
||||||
# sheerka.new(
|
|
||||||
# BuiltinConcepts.PARSER_RESULT,
|
|
||||||
# parser=self,
|
|
||||||
# source=parser_input.source,
|
|
||||||
# body=choice,
|
|
||||||
# try_parsed=None))
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# if len(ret) == 1:
|
|
||||||
# self.log_result(context, parser_input.source, ret[0])
|
|
||||||
# return ret[0]
|
|
||||||
# else:
|
|
||||||
# self.log_multiple_results(context, parser_input.source, ret)
|
|
||||||
# return ret
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_concepts_nodes(context, index, token):
|
|
||||||
# """
|
|
||||||
# Tries to recognize a concept
|
|
||||||
# from the univers of all known concepts
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# if token.type != TokenKind.IDENTIFIER:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# concept = context.new_concept(token.value)
|
|
||||||
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
|
||||||
# concepts = concept if hasattr(concept, "__iter__") else [concept]
|
|
||||||
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
|
||||||
# return concepts_nodes
|
|
||||||
#
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# @staticmethod
|
|
||||||
# def get_source_code_node(context, index, tokens):
|
|
||||||
# """
|
|
||||||
# Tries to recognize source code.
|
|
||||||
# For the time being, only Python is supported
|
|
||||||
# :param context:
|
|
||||||
# :param tokens:
|
|
||||||
# :param index:
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# end_index = len(tokens)
|
|
||||||
# while end_index > 0:
|
|
||||||
# parser = PythonParser()
|
|
||||||
# tokens_to_parse = tokens[:end_index]
|
|
||||||
# res = parser.parse(context, tokens_to_parse)
|
|
||||||
# if res.status:
|
|
||||||
# # only expression are accepted
|
|
||||||
# ast_ = res.value.value.ast_
|
|
||||||
# if not isinstance(ast_, ast.Expression):
|
|
||||||
# return None
|
|
||||||
# try:
|
|
||||||
# compiled = compile(ast_, "<string>", "eval")
|
|
||||||
# eval(compiled, {}, {})
|
|
||||||
# except Exception:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
|
||||||
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
|
||||||
# end_index -= 1
|
|
||||||
#
|
|
||||||
# return None
|
|
||||||
@@ -12,10 +12,10 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF
|
|||||||
("foo", ["foo"], "foo"),
|
("foo", ["foo"], "foo"),
|
||||||
("foo a", ["foo"], "__var__0 a"),
|
("foo a", ["foo"], "__var__0 a"),
|
||||||
("foo a b", ["a"], "foo __var__0 b"),
|
("foo a b", ["a"], "foo __var__0 b"),
|
||||||
("'foo'", [], "foo"),
|
("'foo'", [], "'foo'"),
|
||||||
("my name is a", ["a"], "my name is __var__0"),
|
("my name is a", ["a"], "my name is __var__0"),
|
||||||
("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
|
("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
|
||||||
("a 'b c' d", ["b", "c"], "a b c d"),
|
("a 'b c' d", ["b", "c"], "a 'b c' d"),
|
||||||
("a | b", ["a", "b"], "__var__0 | __var__1"),
|
("a | b", ["a", "b"], "__var__0 | __var__1"),
|
||||||
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
|
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
|
||||||
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
|
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
|
|||||||
|
|
||||||
def test_i_can_tokenize():
|
def test_i_can_tokenize():
|
||||||
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
|
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
|
||||||
source += "$£€!_identifier°~_^\\`==#"
|
source += "$£€!_identifier°~_^\\`==#__var__10"
|
||||||
tokens = list(Tokenizer(source))
|
tokens = list(Tokenizer(source))
|
||||||
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
|
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
|
||||||
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
|
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
|
||||||
@@ -54,8 +54,9 @@ def test_i_can_tokenize():
|
|||||||
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
|
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
|
||||||
assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51)
|
assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51)
|
||||||
assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53)
|
assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53)
|
||||||
|
assert tokens[48] == Token(TokenKind.VAR_DEF, '__var__10', 112, 6, 54)
|
||||||
|
|
||||||
assert tokens[48] == Token(TokenKind.EOF, '', 112, 6, 54)
|
assert tokens[49] == Token(TokenKind.EOF, '', 121, 6, 63)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, expected", [
|
@pytest.mark.parametrize("text, expected", [
|
||||||
@@ -88,6 +89,19 @@ def test_i_can_parse_word(text):
|
|||||||
assert tokens[1].index == len(text)
|
assert tokens[1].index == len(text)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text", [
|
||||||
|
"__var__0",
|
||||||
|
"__var__1",
|
||||||
|
"__var__10",
|
||||||
|
"__var__999",
|
||||||
|
])
|
||||||
|
def test_i_can_parse_var_def(text):
|
||||||
|
tokens = list(Tokenizer(text))
|
||||||
|
assert len(tokens) == 2
|
||||||
|
assert tokens[0].type == TokenKind.VAR_DEF
|
||||||
|
assert tokens[0].value == text
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, message, error_text, index, line, column", [
|
@pytest.mark.parametrize("text, message, error_text, index, line, column", [
|
||||||
("'string", "Missing Trailing quote", "'string", 7, 1, 8),
|
("'string", "Missing Trailing quote", "'string", 7, 1, 8),
|
||||||
('"string', "Missing Trailing quote", '"string', 7, 1, 8),
|
('"string', "Missing Trailing quote", '"string', 7, 1, 8),
|
||||||
|
|||||||
@@ -36,9 +36,9 @@ def compute_debug_array(res):
|
|||||||
if token.type == TokenKind.WHITESPACE:
|
if token.type == TokenKind.WHITESPACE:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
res_debug.append(token.value)
|
res_debug.append("T(" + token.value + ")")
|
||||||
else:
|
else:
|
||||||
res_debug.append(token.concept.name)
|
res_debug.append("C(" + token.concept.name + ")")
|
||||||
to_compare.append(res_debug)
|
to_compare.append(res_debug)
|
||||||
|
|
||||||
return to_compare
|
return to_compare
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,193 +0,0 @@
|
|||||||
# import ast
|
|
||||||
#
|
|
||||||
# import pytest
|
|
||||||
#
|
|
||||||
# from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
|
||||||
# from core.concept import Concept
|
|
||||||
# from core.tokenizer import Token, TokenKind, Tokenizer
|
|
||||||
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
|
||||||
# from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
|
|
||||||
# from parsers.MultipleConceptsParser import MultipleConceptsParser
|
|
||||||
# from parsers.PythonParser import PythonNode
|
|
||||||
#
|
|
||||||
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
||||||
#
|
|
||||||
# multiple_concepts_parser = MultipleConceptsParser()
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def ret_val(*args):
|
|
||||||
# result = []
|
|
||||||
# index = 0
|
|
||||||
# source = ""
|
|
||||||
# for item in args:
|
|
||||||
# if isinstance(item, Concept):
|
|
||||||
# tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
|
|
||||||
# result.append(ConceptNode(item, index, index, tokens, item.name))
|
|
||||||
# index += 1
|
|
||||||
# source += item.name
|
|
||||||
# elif isinstance(item, PythonNode):
|
|
||||||
# tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
|
|
||||||
# result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
|
|
||||||
# index += len(tokens)
|
|
||||||
# source += item.source
|
|
||||||
# else:
|
|
||||||
# tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
|
|
||||||
# result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
|
|
||||||
# index += len(tokens)
|
|
||||||
# source += item
|
|
||||||
#
|
|
||||||
# return ReturnValueConcept(
|
|
||||||
# "who",
|
|
||||||
# False,
|
|
||||||
# ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
|
|
||||||
#
|
|
||||||
# def init(self, concepts, inputs):
|
|
||||||
# context = self.get_context()
|
|
||||||
# for concept in concepts:
|
|
||||||
# context.sheerka.create_new_concept(context, concept)
|
|
||||||
#
|
|
||||||
# return context, ret_val(*inputs)
|
|
||||||
#
|
|
||||||
# def execute(self, concepts, inputs):
|
|
||||||
# context, input_return_values = self.init(concepts, inputs)
|
|
||||||
#
|
|
||||||
# parser = ConceptsWithConceptsParser()
|
|
||||||
# result = parser.parse(context, input_return_values.body)
|
|
||||||
#
|
|
||||||
# wrapper = result.body
|
|
||||||
# return_value = result.body.body
|
|
||||||
#
|
|
||||||
# return context, parser, result, wrapper, return_value
|
|
||||||
#
|
|
||||||
# @pytest.mark.parametrize("text, interested", [
|
|
||||||
# ("not parser result", False),
|
|
||||||
# (ParserResultConcept(parser="not multiple_concepts_parser"), False),
|
|
||||||
# (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
|
|
||||||
# ])
|
|
||||||
# def test_not_interested(self, text, interested):
|
|
||||||
# context = self.get_context()
|
|
||||||
#
|
|
||||||
# res = ConceptsWithConceptsParser().parse(context, text)
|
|
||||||
# if interested:
|
|
||||||
# assert res is not None
|
|
||||||
# else:
|
|
||||||
# assert res is None
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_composition_of_concepts(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# bar = Concept("bar")
|
|
||||||
# plus = Concept("a plus b").def_var("a").def_var("b")
|
|
||||||
#
|
|
||||||
# context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar])
|
|
||||||
#
|
|
||||||
# assert result.status
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert result.who == wrapper.parser.name
|
|
||||||
# assert wrapper.source == "foo plus bar"
|
|
||||||
# assert context.sheerka.isinstance(return_value, plus)
|
|
||||||
#
|
|
||||||
# assert return_value.compiled["a"] == foo
|
|
||||||
# assert return_value.compiled["b"] == bar
|
|
||||||
#
|
|
||||||
# # sanity check, I can evaluate the result
|
|
||||||
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
|
|
||||||
# assert evaluated.key == return_value.key
|
|
||||||
# assert evaluated.get_prop("a") == foo.init_key()
|
|
||||||
# assert evaluated.get_prop("b") == bar.init_key()
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_composition_of_source_code(self):
|
|
||||||
# plus = Concept("a plus b", body="a + b").def_var("a").def_var("b")
|
|
||||||
# left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
|
|
||||||
# right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
|
|
||||||
# context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right])
|
|
||||||
#
|
|
||||||
# assert result.status
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert result.who == wrapper.parser.name
|
|
||||||
# assert wrapper.source == "1+1 plus 2+2"
|
|
||||||
# assert context.sheerka.isinstance(return_value, plus)
|
|
||||||
#
|
|
||||||
# left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
|
|
||||||
# right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
|
|
||||||
# assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
|
|
||||||
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
|
|
||||||
#
|
|
||||||
# # sanity check, I can evaluate the result
|
|
||||||
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
|
|
||||||
# assert evaluated.key == return_value.key
|
|
||||||
# assert evaluated.get_prop("a") == 2
|
|
||||||
# assert evaluated.get_prop("b") == 4
|
|
||||||
# assert evaluated.body == 6
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_mix_of_concept_and_code(self):
|
|
||||||
# plus = Concept("a plus b").def_var("a").def_var("b")
|
|
||||||
# code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code])
|
|
||||||
#
|
|
||||||
# assert result.status
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert result.who == wrapper.parser.name
|
|
||||||
# assert wrapper.source == "foo plus 1+1"
|
|
||||||
# assert context.sheerka.isinstance(return_value, plus)
|
|
||||||
#
|
|
||||||
# code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
|
|
||||||
# assert return_value.compiled["a"] == foo
|
|
||||||
# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
|
|
||||||
#
|
|
||||||
# # sanity check, I can evaluate the result
|
|
||||||
# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value)
|
|
||||||
# assert evaluated.key == return_value.key
|
|
||||||
# assert evaluated.get_prop("a") == foo.init_key()
|
|
||||||
# assert evaluated.get_prop("b") == 2
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_multiple_concepts_are_recognized(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# bar = Concept("bar")
|
|
||||||
# plus_1 = Concept("a plus b", body="body1").def_var("a").def_var("b")
|
|
||||||
# plus_2 = Concept("a plus b", body="body2").def_var("a").def_var("b")
|
|
||||||
#
|
|
||||||
# context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
|
|
||||||
# parser = ConceptsWithConceptsParser()
|
|
||||||
# result = parser.parse(context, input_return_values.body)
|
|
||||||
#
|
|
||||||
# assert len(result) == 2
|
|
||||||
#
|
|
||||||
# res = result[0]
|
|
||||||
# wrapper = res.value
|
|
||||||
# return_value = res.value.value
|
|
||||||
# assert res.status
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert res.who == wrapper.parser.name
|
|
||||||
# assert wrapper.source == "foo plus bar"
|
|
||||||
# assert context.sheerka.isinstance(return_value, plus_1)
|
|
||||||
# assert return_value.compiled["a"] == foo
|
|
||||||
# assert return_value.compiled["b"] == bar
|
|
||||||
#
|
|
||||||
# res = result[1]
|
|
||||||
# wrapper = res.value
|
|
||||||
# return_value = res.value.value
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert res.who == wrapper.parser.name
|
|
||||||
# assert wrapper.source == "foo plus bar"
|
|
||||||
# assert context.sheerka.isinstance(return_value, plus_2)
|
|
||||||
# assert return_value.compiled["a"] == foo
|
|
||||||
# assert return_value.compiled["b"] == bar
|
|
||||||
#
|
|
||||||
# def test_i_cannot_parse_when_unknown_concept(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# bar = Concept("bar")
|
|
||||||
#
|
|
||||||
# context, input_return_values = self.init([foo, bar], [foo, " plus ", bar])
|
|
||||||
# parser = ConceptsWithConceptsParser()
|
|
||||||
# result = parser.parse(context, input_return_values.body)
|
|
||||||
# wrapper = result.body
|
|
||||||
# return_value = result.body.body
|
|
||||||
#
|
|
||||||
# assert not result.status
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
|
|
||||||
# assert result.who == parser.name
|
|
||||||
# assert return_value == input_return_values.body.body
|
|
||||||
@@ -1,216 +0,0 @@
|
|||||||
# import pytest
|
|
||||||
#
|
|
||||||
# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
|
||||||
# from core.concept import Concept
|
|
||||||
# from core.tokenizer import Tokenizer, TokenKind, Token
|
|
||||||
# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode
|
|
||||||
# from parsers.BnfNodeParser import BnfNodeParser, Sequence
|
|
||||||
# from parsers.MultipleConceptsParser import MultipleConceptsParser
|
|
||||||
# from parsers.PythonParser import PythonNode
|
|
||||||
#
|
|
||||||
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def get_return_value(context, grammar, expression):
|
|
||||||
# parser = BnfNodeParser()
|
|
||||||
# parser.initialize(context, grammar)
|
|
||||||
#
|
|
||||||
# ret_val = parser.parse(context, expression)
|
|
||||||
# assert not ret_val.status
|
|
||||||
# return ret_val
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
|
|
||||||
#
|
|
||||||
# def init(self, concepts, grammar, expression):
|
|
||||||
# context = self.get_context()
|
|
||||||
# for c in concepts:
|
|
||||||
# context.sheerka.create_new_concept(context, c)
|
|
||||||
# return_value = get_return_value(context, grammar, expression)
|
|
||||||
#
|
|
||||||
# return context, return_value
|
|
||||||
#
|
|
||||||
# def test_not_interested_if_not_parser_result(self):
|
|
||||||
# context = self.get_context()
|
|
||||||
# text = "not parser result"
|
|
||||||
#
|
|
||||||
# res = MultipleConceptsParser().parse(context, text)
|
|
||||||
# assert res is None
|
|
||||||
#
|
|
||||||
# def test_not_interested_if_not_from_concept_lexer_parser(self):
|
|
||||||
# context = self.get_context()
|
|
||||||
# text = ParserResultConcept(parser="not concept lexer", value="some value")
|
|
||||||
#
|
|
||||||
# res = MultipleConceptsParser().parse(context, text)
|
|
||||||
# assert res is None
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_exact_concepts(self):
|
|
||||||
# foo = Concept("foo", body="'foo'")
|
|
||||||
# bar = Concept("bar", body="'bar'")
|
|
||||||
# baz = Concept("baz", body="'baz'")
|
|
||||||
# grammar = {}
|
|
||||||
# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [
|
|
||||||
# ConceptNode(bar, 0, 0, source="bar"),
|
|
||||||
# ConceptNode(foo, 2, 2, source="foo"),
|
|
||||||
# ConceptNode(baz, 4, 4, source="baz")]
|
|
||||||
# assert ret_val.value.source == "bar foo baz"
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_ending_with_bnf(self):
|
|
||||||
# foo = Concept("foo", body="'foo'")
|
|
||||||
# bar = Concept("bar", body="'bar'")
|
|
||||||
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
|
|
||||||
# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
|
|
||||||
# assert ret_val.value.source == "bar foo1 foo2 foo3"
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_starting_with_bnf(self):
|
|
||||||
# foo = Concept("foo", body="'foo'")
|
|
||||||
# bar = Concept("bar", body="'bar'")
|
|
||||||
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
|
|
||||||
# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
|
|
||||||
# assert ret_val.value.source == "foo1 foo2 foo3 bar"
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_concept_are_mixed(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# bar = Concept("bar")
|
|
||||||
# baz = Concept("baz")
|
|
||||||
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
|
|
||||||
# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [
|
|
||||||
# cnode("baz", 0, 0, "baz"),
|
|
||||||
# cnode("foo", 2, 6, "foo1 foo2 foo3"),
|
|
||||||
# cnode("bar", 8, 8, "bar")]
|
|
||||||
# assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_multiple_concepts_are_matching(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# bar = Concept("bar", body="bar1")
|
|
||||||
# baz = Concept("bar", body="bar2")
|
|
||||||
# grammar = {foo: "foo"}
|
|
||||||
# context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert len(ret_val) == 2
|
|
||||||
# assert ret_val[0].status
|
|
||||||
# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
|
|
||||||
# assert ret_val[0].value.source == "foo bar"
|
|
||||||
# assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
|
|
||||||
#
|
|
||||||
# assert ret_val[1].status
|
|
||||||
# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
|
|
||||||
# assert ret_val[1].value.source == "foo bar"
|
|
||||||
# assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
|
|
||||||
#
|
|
||||||
# def test_i_can_parse_when_source_code(self):
|
|
||||||
# foo = Concept("foo")
|
|
||||||
# grammar = {foo: "foo"}
|
|
||||||
# context, return_value = self.init([foo], grammar, "1 foo")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
# wrapper = ret_val.value
|
|
||||||
# value = ret_val.value.value
|
|
||||||
#
|
|
||||||
# assert ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert wrapper.source == "1 foo"
|
|
||||||
# assert value == [
|
|
||||||
# scnode(0, 1, "1 "),
|
|
||||||
# cnode("foo", 2, 2, "foo")]
|
|
||||||
#
|
|
||||||
# def test_i_cannot_parse_when_unrecognized_token(self):
|
|
||||||
# twenty_two = Concept("twenty two")
|
|
||||||
# one = Concept("one")
|
|
||||||
# grammar = {twenty_two: Sequence("twenty", "two")}
|
|
||||||
# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert not ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [
|
|
||||||
# cnode("twenty two", 0, 2, "twenty two"),
|
|
||||||
# utnode(3, 5, " + "),
|
|
||||||
# cnode("one", 6, 6, "one")
|
|
||||||
# ]
|
|
||||||
# assert ret_val.value.source == "twenty two + one"
|
|
||||||
#
|
|
||||||
# def test_i_cannot_parse_when_unknown_concepts(self):
|
|
||||||
# twenty_two = Concept("twenty two")
|
|
||||||
# one = Concept("one")
|
|
||||||
# grammar = {twenty_two: Sequence("twenty", "two")}
|
|
||||||
# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
|
|
||||||
#
|
|
||||||
# parser = MultipleConceptsParser()
|
|
||||||
# ret_val = parser.parse(context, return_value.body)
|
|
||||||
#
|
|
||||||
# assert not ret_val.status
|
|
||||||
# assert ret_val.who == parser.name
|
|
||||||
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
|
|
||||||
# assert ret_val.value.value == [
|
|
||||||
# cnode("twenty two", 0, 2, "twenty two"),
|
|
||||||
# utnode(3, 5, " plus "),
|
|
||||||
# cnode("one", 6, 6, "one")
|
|
||||||
# ]
|
|
||||||
# assert ret_val.value.source == "twenty two plus one"
|
|
||||||
#
|
|
||||||
# @pytest.mark.parametrize("text, expected_source, expected_end", [
|
|
||||||
# ("True", "True", 0),
|
|
||||||
# ("1 == 1", "1 == 1", 4),
|
|
||||||
# ("1!xdf", "1", 0),
|
|
||||||
# ("1", "1", 0),
|
|
||||||
# ])
|
|
||||||
# def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
|
|
||||||
# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
|
|
||||||
#
|
|
||||||
# start_index = 5 # a random number different of zero
|
|
||||||
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
|
|
||||||
#
|
|
||||||
# assert isinstance(res, SourceCodeNode)
|
|
||||||
# assert isinstance(res.node, PythonNode)
|
|
||||||
# assert res.source == expected_source
|
|
||||||
# assert res.start == start_index
|
|
||||||
# assert res.end == start_index + expected_end
|
|
||||||
#
|
|
||||||
# def test_i_cannot_parse_null_text(self):
|
|
||||||
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
|
|
||||||
# assert res is None
|
|
||||||
#
|
|
||||||
# eof = Token(TokenKind.EOF, "", 0, 0, 0)
|
|
||||||
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
|
|
||||||
# assert res is None
|
|
||||||
+113
-236
@@ -31,6 +31,7 @@ cmap = {
|
|||||||
"if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
"if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
||||||
"square": Concept("square(a)").def_var("a"),
|
"square": Concept("square(a)").def_var("a"),
|
||||||
"foo bar": Concept("foo bar(a)").def_var("a"),
|
"foo bar": Concept("foo bar(a)").def_var("a"),
|
||||||
|
"long infixed": Concept("a long infixed b").def_var("a").def_var("b"),
|
||||||
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,8 +51,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
TestSyaNodeParser.sheerka.force_sya_def(context, [
|
TestSyaNodeParser.sheerka.force_sya_def(context, [
|
||||||
(cmap["plus"].id, 5, SyaAssociativity.Right),
|
(cmap["plus"].id, 5, SyaAssociativity.Right),
|
||||||
(cmap["mult"].id, 10, SyaAssociativity.Right),
|
(cmap["mult"].id, 10, SyaAssociativity.Right),
|
||||||
(cmap["minus"].id, 10, SyaAssociativity.Right),
|
(cmap["minus"].id, 10, SyaAssociativity.Right)])
|
||||||
(cmap["square"].id, None, SyaAssociativity.No)])
|
|
||||||
|
|
||||||
def init_parser(self,
|
def init_parser(self,
|
||||||
my_concepts_map=None,
|
my_concepts_map=None,
|
||||||
@@ -161,8 +161,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
]),
|
]),
|
||||||
("twenty one plus twenty two",
|
("twenty one plus twenty two",
|
||||||
[
|
[
|
||||||
["twenty ", "one", " twenty ", "plus", "two"],
|
["twenty ", "one", ("twenty ", 1), "plus", "two"],
|
||||||
[cnode("twenties", 0, 2, "twenty one"), " twenty ", "plus", "two"],
|
[cnode("twenties", 0, 2, "twenty one"), ("twenty ", 1), "plus", "two"],
|
||||||
["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"],
|
["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"],
|
||||||
[cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"],
|
[cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"],
|
||||||
]),
|
]),
|
||||||
@@ -184,13 +184,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]),
|
("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences):
|
def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences):
|
||||||
# concepts_map = {
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -241,12 +234,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("x$!# prefixed", [["x$!#", "prefixed"]]),
|
("x$!# prefixed", [["x$!#", "prefixed"]]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences):
|
def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences):
|
||||||
# concepts_map = {
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -314,12 +301,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("suffixed x$!#", [["x$!#", "suffixed"]]),
|
("suffixed x$!#", [["x$!#", "suffixed"]]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences):
|
def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences):
|
||||||
# concepts_map = {
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -351,8 +332,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
@pytest.mark.parametrize("expression, expected_sequences", [
|
@pytest.mark.parametrize("expression, expected_sequences", [
|
||||||
("one ? two : three", [["one", "two", "three", "?"]]),
|
("one ? two : three", [["one", "two", "three", "?"]]),
|
||||||
|
("one ? baz qux : two", [["one", "baz qux", "two", "?"]]),
|
||||||
("1+1 ? one + two : twenty one", [
|
("1+1 ? one + two : twenty one", [
|
||||||
["1+1 ", "one", " + ", "two"], # an error is detected
|
["1+1", "one", " + ", "two"], # error is detected so the parsing has stopped
|
||||||
["1+1", "one + two", "twenty ", "?", ("one", 1)],
|
["1+1", "one + two", "twenty ", "?", ("one", 1)],
|
||||||
["1+1", "one + two", short_cnode("twenties", "twenty one"), "?"],
|
["1+1", "one + two", short_cnode("twenties", "twenty one"), "?"],
|
||||||
]),
|
]),
|
||||||
@@ -364,13 +346,13 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if"],
|
["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if"],
|
||||||
]),
|
]),
|
||||||
("if x$!# then one + two else z$!# end", [
|
("if x$!# then one + two else z$!# end", [
|
||||||
[" x$!# ", "one", " + ", "two"], # an error is detected
|
["x$!#", "one", " + ", "two"], # error is detected so the parsing has stopped
|
||||||
["x$!#", "one + two", "z$!#", "if"],
|
["x$!#", "one + two", "z$!#", "if"],
|
||||||
]),
|
]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences):
|
def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences):
|
||||||
"""
|
"""
|
||||||
The purpose of this test is to validate concepts like
|
The purpose of this test is to validate concepts
|
||||||
that have at least 3 parameters separated by tokens
|
that have at least 3 parameters separated by tokens
|
||||||
Example :
|
Example :
|
||||||
var_0 token var_1 token var_2
|
var_0 token var_1 token var_2
|
||||||
@@ -381,14 +363,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# concepts_map = {
|
|
||||||
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -480,17 +454,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]),
|
("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_binary_with_precedence(self, expression, expected):
|
def test_i_can_post_fix_binary_with_precedence(self, expression, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "mult": Concept("a mult b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# }
|
|
||||||
# sya_def = {
|
|
||||||
# concepts_map["plus"]: (5, SyaAssociativity.Right),
|
|
||||||
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -566,7 +529,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
}
|
}
|
||||||
|
|
||||||
sya_def = {
|
sya_def = {
|
||||||
concepts_map["plus"]: (None, SyaAssociativity.Left),
|
concepts_map["plus"]: (1, SyaAssociativity.Left),
|
||||||
}
|
}
|
||||||
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
|
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
|
||||||
|
|
||||||
@@ -669,9 +632,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
||||||
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
|
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
|
||||||
|
|
||||||
("(one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
||||||
("( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
||||||
("( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
|
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
|
||||||
|
|
||||||
("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]),
|
("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]),
|
||||||
("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]),
|
("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]),
|
||||||
@@ -681,32 +644,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]),
|
("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]),
|
||||||
("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]),
|
("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]),
|
||||||
("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]),
|
("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]),
|
||||||
("( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]),
|
("(( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]),
|
||||||
|
|
||||||
("foo bar(one)", ["one", "foo bar"]),
|
("foo bar(one)", ["one", "foo bar"]),
|
||||||
("foo bar ( one )", ["one", "foo bar"]),
|
("foo bar ( one )", ["one", "foo bar"]),
|
||||||
])
|
])
|
||||||
def test_i_can_pos_fix_when_parenthesis(self, expression, expected):
|
def test_i_can_pos_fix_when_parenthesis(self, expression, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "square": Concept("square(a)").def_var("a"),
|
|
||||||
# "foo bar": Concept("foo bar(a)").def_var("a"),
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "minus": Concept("a minus b").def_var("a").def_var("b"),
|
|
||||||
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# sya_def = {
|
|
||||||
# concepts_map["square"]: (None, SyaAssociativity.No),
|
|
||||||
# concepts_map["plus"]: (10, SyaAssociativity.Right),
|
|
||||||
# concepts_map["minus"]: (10, SyaAssociativity.Right),
|
|
||||||
# }
|
|
||||||
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -762,21 +705,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]),
|
("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences):
|
def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences):
|
||||||
# concepts_map = {
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "mult": Concept("a mult b").def_var("a").def_var("b"),
|
|
||||||
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
# sya_def = {
|
|
||||||
# concepts_map["plus"]: (5, SyaAssociativity.Right),
|
|
||||||
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -787,28 +715,22 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert res_i.out == expected_array
|
assert res_i.out == expected_array
|
||||||
|
|
||||||
@pytest.mark.parametrize("expression, expected", [
|
@pytest.mark.parametrize("expression, expected", [
|
||||||
("(", ("(", 0)),
|
# ("(", ("(", 0)),
|
||||||
("one plus ( 1 + ", ("(", 4)),
|
# ("one plus ( 1 + ", ("(", 4)),
|
||||||
("one( 1 + ", ("(", 1)),
|
# ("one( 1 + ", ("(", 1)),
|
||||||
("one ( 1 + ", ("(", 2)),
|
# ("one ( 1 + ", ("(", 2)),
|
||||||
("function( 1 + ", ("(", 1)),
|
# ("function( 1 + ", ("(", 1)),
|
||||||
("function ( 1 + ", ("(", 2)),
|
# ("function ( 1 + ", ("(", 2)),
|
||||||
("one plus ) 1 + ", (")", 4)),
|
# ("one plus ) 1 + ", (")", 4)),
|
||||||
("one ) 1 + ", (")", 2)),
|
# ("one ) 1 + ", (")", 2)),
|
||||||
("function ) 1 + ", (")", 2)),
|
# ("function ) 1 + ", (")", 2)),
|
||||||
("one ? ( : two", ("(", 4)),
|
# ("one ? ( : two", ("(", 4)),
|
||||||
("one ? one plus ( : two", ("(", 8)),
|
# ("one ? one plus ( : two", ("(", 8)),
|
||||||
("one ? ) : two", (")", 4)),
|
# ("one ? ) : two", (")", 4)),
|
||||||
("one ? one plus ) : two", (")", 8)),
|
# ("one ? one plus ) : two", (")", 8)),
|
||||||
("(one plus ( 1 + )", ("(", 0)),
|
("(one plus ( 1 + )", ("(", 0)),
|
||||||
])
|
])
|
||||||
def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected):
|
def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -820,12 +742,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("one ? one two : three", ("?", ":")),
|
("one ? one two : three", ("?", ":")),
|
||||||
])
|
])
|
||||||
def test_i_can_detected_when_too_many_parameters(self, expression, expected):
|
def test_i_can_detected_when_too_many_parameters(self, expression, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser(cmap, None)
|
sheerka, context, parser = self.init_parser(cmap, None)
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -860,17 +776,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]),
|
("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]),
|
||||||
])
|
])
|
||||||
def test_i_can_post_fix_sequences(self, expression, expected):
|
def test_i_can_post_fix_sequences(self, expression, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "infix": Concept("a infix b").def_var("a").def_var("b"),
|
|
||||||
# "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "four": Concept("four"),
|
|
||||||
# }
|
|
||||||
|
|
||||||
sheerka, context, parser = self.init_parser(cmap, None)
|
sheerka, context, parser = self.init_parser(cmap, None)
|
||||||
|
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -886,23 +791,49 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
"plus equals": Concept("a plus equals b").def_var("a").def_var("b"),
|
"plus equals": Concept("a plus equals b").def_var("a").def_var("b"),
|
||||||
}
|
}
|
||||||
|
|
||||||
sheerka, context, parser = self.init_parser(concepts_map, None)
|
sya_def = {
|
||||||
|
concepts_map["plus"]: (1, SyaAssociativity.Right),
|
||||||
|
concepts_map["plus plus"]: (1, SyaAssociativity.Right),
|
||||||
|
concepts_map["plus equals"]: (1, SyaAssociativity.Right),
|
||||||
|
}
|
||||||
|
|
||||||
|
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
|
||||||
|
|
||||||
expression = "a plus plus equals b"
|
expression = "a plus plus equals b"
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
expected_array = tests.parsers.parsers_utils.compute_debug_array(res)
|
expected_array = tests.parsers.parsers_utils.compute_debug_array(res)
|
||||||
assert expected_array == [
|
assert expected_array == [
|
||||||
["a", "a plus b", "a plus b", "equals", "b"],
|
["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus b", "a plus plus", "equals", "b"],
|
["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus b", "a plus equals b", "equals", "b"],
|
["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus plus", "plus", "equals", "b"],
|
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus plus", "plus", "equals", "b"],
|
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus plus", "plus", "equals", "b"],
|
["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus equals b", "a plus b", "equals", "b"],
|
["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus equals b", "a plus plus", "equals", "b"],
|
["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"],
|
||||||
["a", "a plus equals b", "a plus equals b", "equals", "b"],
|
["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def test_non_reg(self):
|
||||||
|
concepts_map = {
|
||||||
|
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
||||||
|
"complex infix": Concept("a complex infix b ").def_var("a").def_var("b"),
|
||||||
|
}
|
||||||
|
|
||||||
|
sya_def = {
|
||||||
|
# concepts_map["plus"]: (1, SyaAssociativity.Right),
|
||||||
|
# concepts_map["plus plus"]: (1, SyaAssociativity.Right),
|
||||||
|
# concepts_map["plus equals"]: (1, SyaAssociativity.Right),
|
||||||
|
}
|
||||||
|
|
||||||
|
sheerka, context, parser = self.init_parser(concepts_map, sya_def)
|
||||||
|
|
||||||
|
expression = "a plus complex infix b"
|
||||||
|
res = parser.infix_to_postfix(context, expression)
|
||||||
|
|
||||||
|
res = parser.parse(context, expression)
|
||||||
|
pass
|
||||||
|
|
||||||
def test_i_can_use_string_instead_of_identifier(self):
|
def test_i_can_use_string_instead_of_identifier(self):
|
||||||
concepts_map = {
|
concepts_map = {
|
||||||
"ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"),
|
"ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"),
|
||||||
@@ -945,13 +876,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
Not quite sure why this test is here
|
Not quite sure why this test is here
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
# concepts_map = {
|
sheerka, context, parser = self.init_parser()
|
||||||
# "foo": Concept("foo a").def_var("a"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser(cmap, None)
|
|
||||||
|
|
||||||
expression = "suffixed twenties"
|
expression = "suffixed twenties"
|
||||||
res = parser.infix_to_postfix(context, expression)
|
res = parser.infix_to_postfix(context, expression)
|
||||||
@@ -962,17 +887,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert res[0].out == expected_array
|
assert res[0].out == expected_array
|
||||||
|
|
||||||
def test_i_can_parse_when_concept_atom_only(self):
|
def test_i_can_parse_when_concept_atom_only(self):
|
||||||
# concepts_map = {
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "mult": Concept("a mult b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# }
|
|
||||||
# sya_def = {
|
|
||||||
# concepts_map["plus"]: (5, SyaAssociativity.Right),
|
|
||||||
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
text = "one plus two mult three"
|
text = "one plus two mult three"
|
||||||
@@ -992,10 +906,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert expected_concept.compiled["b"].compiled["b"] == cmap["three"]
|
assert expected_concept.compiled["b"].compiled["b"] == cmap["three"]
|
||||||
|
|
||||||
def test_i_can_parse_when_python_code(self):
|
def test_i_can_parse_when_python_code(self):
|
||||||
# concepts_map = {
|
sheerka, context, parser = self.init_parser()
|
||||||
# "foo": Concept("foo a").def_var("a")
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser(cmap, None)
|
|
||||||
|
|
||||||
text = "suffixed 1 + 1"
|
text = "suffixed 1 + 1"
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
@@ -1018,12 +929,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert isinstance(return_value_a.body.body, PythonNode)
|
assert isinstance(return_value_a.body.body, PythonNode)
|
||||||
|
|
||||||
def test_i_can_parse_when_bnf_concept(self):
|
def test_i_can_parse_when_bnf_concept(self):
|
||||||
# concepts_map = {
|
|
||||||
# "foo": Concept("foo a").def_var("a"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
text = "suffixed twenty one"
|
text = "suffixed twenty one"
|
||||||
@@ -1043,13 +948,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"]
|
assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"]
|
||||||
|
|
||||||
def test_i_can_parse_sequences(self):
|
def test_i_can_parse_sequences(self):
|
||||||
# concepts_map = {
|
sheerka, context, parser = self.init_parser()
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "foo": Concept("foo a").def_var("a"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser(cmap, None)
|
|
||||||
|
|
||||||
text = "one plus 1 + 1 suffixed two"
|
text = "one plus 1 + 1 suffixed two"
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
@@ -1087,21 +986,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
|
SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
|
||||||
])
|
])
|
||||||
def test_i_can_parse_when_one_result(self, text, expected_status, expected_result):
|
def test_i_can_parse_when_one_result(self, text, expected_status, expected_result):
|
||||||
# concepts_map = {
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "mult": Concept("a mult b").def_var("a").def_var("b"),
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
# sya_def = {
|
|
||||||
# concepts_map["plus"]: (5, SyaAssociativity.Right),
|
|
||||||
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
@@ -1113,41 +997,54 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert lexer_nodes == expected_array
|
assert lexer_nodes == expected_array
|
||||||
|
|
||||||
# @pytest.mark.parametrize("text, list_of_expected", [
|
@pytest.mark.parametrize("text", [
|
||||||
# ("1 plus twenty one", [
|
"foo bar (one",
|
||||||
# (False, [CNC("plus", a=scnode(0, 0, "1"), b=UTN(" twenty ")), CN("one")]),
|
"foo bar one",
|
||||||
# (True, [CNC("plus", a=scnode(0, 0, "1"), b=CN("twenties", source="twenty one"))])
|
"foo one two",
|
||||||
# ])
|
"foo x$!# one",
|
||||||
# ])
|
])
|
||||||
# def test_i_can_parse_when_multiple_results(self, text, list_of_expected):
|
def test_i_cannot_parse_when_concept_almost_found(self, text):
|
||||||
# concepts_map = {
|
"""
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
We test that the parsed concept seems like a known one, but it was not.
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
The parser has to detected that the predication was incorrect
|
||||||
# "mult": Concept("a mult b").def_var("a").def_var("b"),
|
:return:
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
"""
|
||||||
# "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"),
|
sheerka, context, parser = self.init_parser()
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
res = parser.parse(context, text)
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
assert not res.status
|
||||||
# }
|
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
||||||
# sya_def = {
|
assert res.body.body == text
|
||||||
# concepts_map["plus"]: (5, SyaAssociativity.Right),
|
|
||||||
# concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus
|
@pytest.mark.parametrize("text, expected_result", [
|
||||||
# }
|
("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]),
|
||||||
# sheerka, context, parser = self.init_parser(concepts_map, sya_def)
|
("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),
|
||||||
#
|
("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]),
|
||||||
# list_of_res = parser.parse(context, text)
|
("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]),
|
||||||
# assert len(list_of_res) == len(list_of_expected)
|
("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]),
|
||||||
#
|
("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]),
|
||||||
# for res, expected in zip(list_of_res, list_of_expected):
|
("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]),
|
||||||
# wrapper = res.body
|
("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]),
|
||||||
# lexer_nodes = res.body.body
|
("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]),
|
||||||
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
])
|
||||||
#
|
def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result):
|
||||||
# expected_array = compute_expected_array(concepts_map, text, expected[1])
|
"""
|
||||||
# assert res.status == expected[0]
|
We test that the parsed concept seems like a known one, but it was not.
|
||||||
# assert lexer_nodes == expected_array
|
The parser has to detected that the predication was incorrect
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
|
res = parser.parse(context, text)
|
||||||
|
|
||||||
|
wrapper = res.body
|
||||||
|
lexer_nodes = res.body.body
|
||||||
|
|
||||||
|
expected_array = compute_expected_array(cmap, text, expected_result)
|
||||||
|
assert not res.status
|
||||||
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||||
|
assert lexer_nodes == expected_array
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [
|
@pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [
|
||||||
("x$!# prefixed", "prefixed", ["a"]),
|
("x$!# prefixed", "prefixed", ["a"]),
|
||||||
@@ -1157,12 +1054,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("x$!# infix z$!#", "infix", ["a", "b"]),
|
("x$!# infix z$!#", "infix", ["a", "b"]),
|
||||||
])
|
])
|
||||||
def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized):
|
def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized):
|
||||||
# concepts_map = {
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "infix": Concept("a infix b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one")
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
@@ -1183,13 +1074,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]),
|
("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]),
|
||||||
])
|
])
|
||||||
def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected):
|
def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected):
|
||||||
# concepts_map = {
|
|
||||||
# "suffixed": Concept("suffixed a").def_var("a"),
|
|
||||||
# "prefixed": Concept("a prefixed").def_var("a"),
|
|
||||||
# "infix": Concept("a infix b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
@@ -1214,13 +1098,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
Atoms concepts, source code or BNF concepts alone are discarded by the lexer
|
Atoms concepts, source code or BNF concepts alone are discarded by the lexer
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
# concepts_map = {
|
|
||||||
# "plus": Concept("a plus b").def_var("a").def_var("b"),
|
|
||||||
# "one": Concept("one"),
|
|
||||||
# "two": Concept("two"),
|
|
||||||
# "three": Concept("three"),
|
|
||||||
# "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
|
|
||||||
# }
|
|
||||||
sheerka, context, parser = self.init_parser()
|
sheerka, context, parser = self.init_parser()
|
||||||
|
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user