diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index fa2fe8c..7539ce2 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -226,28 +226,28 @@ def only_parsers_results(context, return_values): parents=return_values) -def parse_unrecognized(context, tokens, parsers): +def parse_unrecognized(context, source, parsers): """ - Try to recognize concepts or code from tokens using the given parsers + Try to recognize concepts or code from source using the given parsers :param context: - :param tokens: + :param source: :param parsers: :return: """ steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] sheerka = context.sheerka - with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context: + with context.push(desc=f"Parsing unrecognized '{source}'") as sub_context: # disable all parsers but the following ones sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False) for parser in parsers: sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True) - sub_context.add_inputs(source=tokens) + sub_context.add_inputs(source=source) to_parse = sheerka.ret( context.who, True, - sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens)) + sheerka.new(BuiltinConcepts.USER_INPUT, body=source)) res = sheerka.execute(sub_context, to_parse, steps) sub_context.add_values(return_values=res) diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index f04eb71..c6e0659 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -1,23 +1,178 @@ import core.utils +from cache.Cache import Cache from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.sheerka.services.sheerka_service import BaseService +from core.tokenizer import Tokenizer, TokenKind, Keywords, Token NO_MATCH = "** No Match **" +class ParserInput: + """ + Helper class that tokenizes the input once for all + """ + + def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True): + self.text = text + self.tokens = tokens or None + self.length = len(tokens) if tokens else None + self.yield_oef = yield_oef + + self.start = start or 0 + self.end = end + 1 if end else None + self.sub_text = None + self.sub_tokens = None + + self.pos = None + self.token = None + + self.from_tokens = tokens is not None + + def __repr__(self): + from_tokens = "from_tokens" if self.from_tokens else "" + return f"ParserInput({from_tokens}'{self.text}')" + + def reset(self, yield_oef=True): + if self.tokens is None: + self.tokens = list(Tokenizer(self.text)) + self.length = len(self.tokens) + + if self.end is None: + self.end = self.length + + self.yield_oef = yield_oef + self.pos = self.start - 1 + self.token = None + return self + + def as_text(self, custom_switcher=None, tracker=None): + if custom_switcher is None: + if self.sub_text: + return self.sub_text + if self.start == 0 and self.end == self.length: + self.sub_text = self.text + return self.sub_text + self.sub_text = self.get_text_from_tokens(self.tokens[self.start:self.end]) + return self.sub_text + else: + return self.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker) + + def as_tokens(self): + if self.sub_tokens: + return self.sub_tokens + if self.start == 0 and self.end == self.length: + self.sub_tokens = self.tokens + return self.sub_tokens + self.sub_tokens = self.tokens[self.start:self.end] + return self.sub_tokens + + def next_token(self, skip_whitespace=True): + self.pos += 1 + + if self.pos >= self.end: + if self.yield_oef: + self.token = Token(TokenKind.EOF, "", -1, -1, -1) + return False + + self.token = self.tokens[self.pos] + + if self.token.type == TokenKind.EOF and not self.yield_oef: + return False + + if skip_whitespace: + while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): + self.pos += 1 + if self.pos == self.end: + return False + self.token = self.tokens[self.pos] + + return self.pos < self.end + + def is_empty(self): + if self.text.strip() == "": + return True + + if self.end == self.start: + return True + + if self.end and self.end == self.start + 1 and self.tokens[self.start].type == TokenKind.WHITESPACE: + return True + return False + + @staticmethod + def get_text_from_tokens(tokens, custom_switcher=None, tracker=None): + """ + Create the source code, from the list of token + :param tokens: list of tokens + :param custom_switcher: to override the behaviour (the return value) of some token + :param tracker: keep track of the original token value when custom switched + :return: + """ + if tokens is None: + return "" + res = "" + + if not hasattr(tokens, "__iter__"): + tokens = [tokens] + + switcher = { + TokenKind.KEYWORD: lambda t: Keywords(t.value).value, + TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), + } + + if custom_switcher: + switcher.update(custom_switcher) + + for token in tokens: + value = switcher.get(token.type, lambda t: t.value)(token) + res += value + if tracker is not None and token.type in custom_switcher: + tracker[value] = token.value + return res + + class SheerkaExecute(BaseService): """ Manage the execution of a process flow """ NAME = "Execute" + PARSERS_INPUTS_ENTRY = "ParserInput" # entry for admin or internal variables def __init__(self, sheerka): super().__init__(sheerka) - + self.pi_cache = None + def initialize(self): self.sheerka.bind_service_method(self.execute) + self.pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20) + self.sheerka.cache_manager.register_cache(self.PARSERS_INPUTS_ENTRY, self.pi_cache, False) + + def get_parser_input(self, text, tokens=None): + """ + Returns new or existing parser input + :param text: + :param tokens: + :param length: + :return: + """ + + if isinstance(text, ParserInput): + return text + + if tokens is None or self.pi_cache.has(text): + pi = self.pi_cache.get(text) + if pi is None: # when CacheManager.cache_only is True + pi = ParserInput(text) + self.pi_cache.put(text, pi) + return pi + + key = text or ParserInput.get_text_from_tokens(tokens) + pi = ParserInput(key, tokens) + self.pi_cache.put(key, pi) + return pi + def call_parsers(self, context, return_values): # return_values must be a list @@ -56,7 +211,7 @@ class SheerkaExecute(BaseService): for return_value in inputs_for_this_group: - to_parse = return_value.body.body \ + to_parse = self.get_parser_input(return_value.body.body) \ if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \ else return_value.body diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 6d4be35..67af9e6 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -104,6 +104,9 @@ class Token: else: return str(self.value) + def clone(self): + return Token(self.type, self.value, self.index, self.line, self.column) + @dataclass() class LexerError(Exception): diff --git a/src/evaluators/AddConceptInSetEvaluator.py b/src/evaluators/AddConceptInSetEvaluator.py index 0e182dd..85a368e 100644 --- a/src/evaluators/AddConceptInSetEvaluator.py +++ b/src/evaluators/AddConceptInSetEvaluator.py @@ -1,5 +1,6 @@ import core.builtin_helpers from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +from core.sheerka.services.SheerkaExecute import SheerkaExecute from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.DefaultParser import IsaConceptNode @@ -28,10 +29,11 @@ class AddConceptInSetEvaluator(OneReturnValueEvaluator): def eval(self, context, return_value): def _resolve(name_node): + parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, name_node.tokens) ret_val = sheerka.ret( self.name, True, - sheerka.new(BuiltinConcepts.USER_INPUT, body=name_node.tokens, user_name="N/A")) + sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input, user_name="N/A")) with context.push(desc=f"Recognizing '{name_node}'") as sub_context: r = sheerka.execute(sub_context, ret_val, ALL_STEPS) diff --git a/src/evaluators/LexerNodeEvaluator.py b/src/evaluators/LexerNodeEvaluator.py index 4ccb348..593d810 100644 --- a/src/evaluators/LexerNodeEvaluator.py +++ b/src/evaluators/LexerNodeEvaluator.py @@ -7,7 +7,7 @@ from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode class LexerNodeEvaluator(OneReturnValueEvaluator): """ - After a BNF is recognized, generates the concept or the list concepts + Evaluate a list of LexerNode (ConceptNode | SourceCodeNode | UnrecognizedTokenNode...) """ NAME = "LexerNode" diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 465054e..a2f73ac 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts from core.concept import DEFINITION_TYPE_BNF, Concept +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from core.utils import strip_tokens from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode @@ -250,26 +251,27 @@ class AtomNodeParser(BaseNodeParser): concept_parser_helpers = [AtomConceptParserHelper(self.context)] - while self.next_token(False): + while self.parser_input.next_token(False): for concept_parser in concept_parser_helpers: concept_parser.reset() - token = self.token + token = self.parser_input.token + pos = self.parser_input.pos try: for concept_parser in concept_parser_helpers: - if concept_parser.eat_token(self.token, self.pos): + if concept_parser.eat_token(token, pos): concept_parser.lock() concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name) if not concepts: for concept_parser in concept_parser_helpers: - concept_parser.eat_unrecognized(token, self.pos) + concept_parser.eat_unrecognized(token, pos) continue if len(concepts) == 1: for concept_parser in concept_parser_helpers: - concept_parser.eat_concept(concepts[0], self.pos) + concept_parser.eat_concept(concepts[0], pos) continue # make the cartesian product @@ -284,7 +286,7 @@ class AtomNodeParser(BaseNodeParser): for concept in concepts: clone = concept_parser.clone() temp_res.append(clone) - clone.eat_concept(concept, self.pos) + clone.eat_concept(concept, pos) concept_parser_helpers = temp_res finally: @@ -298,22 +300,26 @@ class AtomNodeParser(BaseNodeParser): return concept_parser_helpers - def get_by_name(self, parser_input): + def get_by_name(self): """ Try to recognize the full parser input as a concept name :return: """ - source = self.get_input_as_text(parser_input) + source = self.parser_input.as_text() concepts = self.sheerka.get_by_name(source.strip()) if not self.sheerka.is_known(concepts): return None concepts = [concepts] if isinstance(concepts, Concept) else concepts res = [] - start, end = self.get_tokens_boundaries(self.tokens) + start, end = self.get_tokens_boundaries(self.parser_input.as_tokens()) for concept in concepts: parser_helper = AtomConceptParserHelper(None) - parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source)) + parser_helper.sequence.append(ConceptNode( + concept, + start, + end, + strip_tokens(self.parser_input.as_tokens(), True), source)) res.append(parser_helper) return res @@ -331,7 +337,7 @@ class AtomNodeParser(BaseNodeParser): if isinstance(node, ConceptNode): if len(node.concept.metadata.variables) > 0: node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts - node.tokens = self.tokens[node.start:node.end + 1] + node.tokens = self.parser_input.tokens[node.start:node.end + 1] node.fix_source() if parser_helper in valid_parser_helpers: @@ -341,8 +347,8 @@ class AtomNodeParser(BaseNodeParser): return valid_parser_helpers - def parse(self, context, parser_input): - if parser_input == "": + def parse(self, context, parser_input: ParserInput): + if parser_input.is_empty(): return context.sheerka.ret( self.name, False, @@ -356,7 +362,7 @@ class AtomNodeParser(BaseNodeParser): context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) sequences = self.get_concepts_sequences() - if by_name := self.get_by_name(parser_input): + if by_name := self.get_by_name(): sequences.extend(by_name) parser_helpers = self.get_valid(sequences) @@ -386,4 +392,4 @@ class AtomNodeParser(BaseNodeParser): return self.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text())) diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 2b62af3..3ee27f8 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -5,7 +5,7 @@ from enum import Enum import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts -from core.sheerka.ExecutionContext import ExecutionContext +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, LexerError, Token, Keywords from parsers.BaseParser import Node, BaseParser, ErrorNode @@ -86,7 +86,6 @@ class UnrecognizedTokensNode(LexerNode): else: self.end -= 1 - def has_open_paren(self): return self.parenthesis_count > 0 @@ -598,13 +597,13 @@ class BaseNodeParser(BaseParser): else: self.concepts_by_first_keyword = None - self.token = None - self.pos = -1 - self.tokens = None - - self.context: ExecutionContext = None - self.text = None - self.sheerka = None + # self.token = None + # self.pos = -1 + # self.tokens = None + # + # self.context: ExecutionContext = None + # self.text = None + # self.sheerka = None def init_from_concepts(self, context, concepts, **kwargs): """ @@ -617,43 +616,48 @@ class BaseNodeParser(BaseParser): concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body - def reset_parser(self, context, text): + def reset_parser(self, context, parser_input: ParserInput): self.context = context self.sheerka = context.sheerka - self.text = text - + self.parser_input = parser_input try: - self.tokens = list(self.get_input_as_tokens(text)) + self.parser_input.reset(False) except LexerError as e: self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) return False - - self.token = None - self.pos = -1 return True + # self.text = text + # + # try: + # self.tokens = list(self.get_input_as_tokens(text)) + # + # + # self.token = None + # self.pos = -1 + # return True - def add_error(self, error, next_token=True): - self.error_sink.append(error) - if next_token: - self.next_token() - return error + # def add_error(self, error, next_token=True): + # self.error_sink.append(error) + # if next_token: + # self.parser_input.next_token() + # return error - def get_token(self) -> Token: - return self.token - - def next_token(self, skip_whitespace=True): - if self.token and self.token.type == TokenKind.EOF: - return False - - self.pos += 1 - self.token = self.tokens[self.pos] - - if skip_whitespace: - while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: - self.pos += 1 - self.token = self.tokens[self.pos] - - return self.token.type != TokenKind.EOF + # def get_token(self) -> Token: + # return self.token + # + # def next_token(self, skip_whitespace=True): + # if self.token and self.token.type == TokenKind.EOF: + # return False + # + # self.pos += 1 + # self.token = self.tokens[self.pos] + # + # if skip_whitespace: + # while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: + # self.pos += 1 + # self.token = self.tokens[self.pos] + # + # return self.token.type != TokenKind.EOF def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False): """ diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index ad58427..dcf32c5 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -9,6 +9,25 @@ from core.sheerka_logger import get_logger from core.tokenizer import TokenKind, Keywords, Token, Tokenizer +# # keep a cache for the parser input +# pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20) +# +# +# def get_parser_input(text, tokens=None, length=None): +# """ +# Returns new or existing parser input +# :param text: +# :param tokens: +# :param length: +# :return: +# """ +# if tokens is None or pi_cache.has(text): +# return pi_cache.get(text) +# pi = ParserInput(text, tokens, length) +# pi_cache.put(text, pi) +# return pi + + @dataclass() class Node: pass @@ -84,6 +103,9 @@ class BaseParser: self.enabled = enabled self.error_sink = [] + self.context: ExecutionContext = None + self.sheerka = None + self.parser_input: ParserInput = None def __eq__(self, other): if not isinstance(other, self.__class__): @@ -99,6 +121,12 @@ class BaseParser: def parse(self, context, parser_input): pass + def add_error(self, error, next_token=True): + self.error_sink.append(error) + if next_token: + self.parser_input.next_token() + return error + @property def has_error(self): return len(self.error_sink) > 0 diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index dd2d24d..dd44cf7 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -14,6 +14,7 @@ from cache.Cache import Cache from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, Token, TokenKind from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode from parsers.BaseParser import ErrorNode @@ -149,7 +150,7 @@ class ConceptExpression(ParsingExpression): return NonTerminalNode(self, node.start, node.end, - parser_helper.parser.tokens[node.start: node.end + 1], + parser_helper.parser.parser_input.tokens[node.start: node.end + 1], [node]) @@ -184,7 +185,11 @@ class Sequence(ParsingExpression): children.append(node) end_pos = node.end - return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, + init_pos, + end_pos, + parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1], + children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -206,7 +211,7 @@ class OrderedChoice(ParsingExpression): return NonTerminalNode(self, init_pos, node.end, - parser_helper.parser.tokens[init_pos: node.end + 1], + parser_helper.parser.parser_input.tokens[init_pos: node.end + 1], [node]) parser_helper.seek(init_pos) # backtrack @@ -237,7 +242,7 @@ class Optional(ParsingExpression): self, node.start, node.end, - parser_helper.parser.tokens[node.start: node.end + 1], + parser_helper.parser.parser_input.tokens[node.start: node.end + 1], [node]) parser_helper.seek(init_pos) # backtrack @@ -303,7 +308,8 @@ class ZeroOrMore(Repetition): if len(children) == 0: return NonTerminalNode(self, init_pos, -1, [], []) - return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1], + children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -343,7 +349,11 @@ class OneOrMore(Repetition): if len(children) == 0: # if nothing is found, it's an error return None - return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, + init_pos, + end_pos, + parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1], + children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -507,24 +517,24 @@ class BnfConceptParserHelper: return False self.pos += 1 - self.token = self.parser.tokens[self.pos] + self.token = self.parser.parser_input.tokens[self.pos] if skip_whitespace: while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: self.pos += 1 - self.token = self.parser.tokens[self.pos] + self.token = self.parser.parser_input.tokens[self.pos] return self.token.type != TokenKind.EOF def seek(self, pos): self.pos = pos - self.token = self.parser.tokens[self.pos] + self.token = self.parser.parser_input.tokens[self.pos] def has_error(self): return len(self.errors) > 0 def is_locked(self): - return self.parser.pos <= self.pos or self.has_error() + return self.parser.parser_input.pos <= self.pos or self.has_error() def eat_concept(self, concept, token): if self.is_locked(): @@ -546,8 +556,8 @@ class BnfConceptParserHelper: self.errors.append(GrammarErrorNode(error_msg)) return - self.pos = self.parser.pos - self.token = self.parser.tokens[self.pos] + self.pos = self.parser.parser_input.pos + self.token = self.parser.parser_input.tokens[self.pos] # parse node = parsing_expression.parse(self) @@ -557,15 +567,15 @@ class BnfConceptParserHelper: self.bnf_parsed = True else: self.debug.append(("Rewind", token)) - self.unrecognized_tokens.add_token(token, self.parser.pos) - self.pos = self.parser.pos # reset position + self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos) + self.pos = self.parser.parser_input.pos # reset position def eat_unrecognized(self, token): if self.is_locked(): return self.debug.append(token) - self.unrecognized_tokens.add_token(token, self.parser.pos) + self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos) def manage_unrecognized(self): if self.unrecognized_tokens.is_empty(): @@ -631,7 +641,7 @@ class BnfConceptParserHelper: concept, underlying.start, underlying.end, - self.parser.tokens[underlying.start: underlying.end + 1], + self.parser.parser_input.tokens[underlying.start: underlying.end + 1], None, underlying) return concept_node @@ -779,9 +789,9 @@ class BnfNodeParser(BaseNodeParser): concept_parser_helpers = [BnfConceptParserHelper(self)] - while self.next_token(False): + while self.parser_input.next_token(False): - token = self.get_token() + token = self.parser_input.token try: concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False) @@ -837,7 +847,7 @@ class BnfNodeParser(BaseNodeParser): resolved = self.resolve_parsing_expression(expression, already_seen or set()) sub_context.add_values(return_values=resolved) - self.concepts_grammars.put(concept.id, resolved) + self.concepts_grammars.put(concept.id, resolved) if self.has_error: return None @@ -929,7 +939,7 @@ class BnfNodeParser(BaseNodeParser): return self.context.concepts[concept] return self.sheerka.get_by_key(concept) - def parse(self, context, parser_input): + def parse(self, context, parser_input: ParserInput): """ parser_input can be string, but text can also be an list of tokens :param context: @@ -940,11 +950,11 @@ class BnfNodeParser(BaseNodeParser): context.log(f"Parsing '{parser_input}' with BnfNode", self.name) sheerka = context.sheerka - if parser_input == "" or isinstance(parser_input, list) and len(parser_input) == 0: + if parser_input.is_empty(): return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, - body=parser_input, + body=parser_input.as_text(), reason=BuiltinConcepts.IS_EMPTY)) if not self.reset_parser(context, parser_input): @@ -966,7 +976,7 @@ class BnfNodeParser(BaseNodeParser): return self.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text())) ret = [] for parser_helper in valid_parser_helpers: @@ -977,13 +987,13 @@ class BnfNodeParser(BaseNodeParser): self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=parser_input, + source=parser_input.as_text(), body=parser_helper.sequence, try_parsed=parser_helper.sequence))) if len(ret) == 1: - self.log_result(context, parser_input, ret[0]) + self.log_result(context, parser_input.as_text(), ret[0]) return ret[0] else: - self.log_multiple_results(context, parser_input, ret) + self.log_multiple_results(context, parser_input.as_text(), ret) return ret diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 8382b35..79dd750 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -115,7 +115,6 @@ class BnfParser(BaseParser): def parse(self, context: ExecutionContext, parser_input): - tree = None try: self.reset_parser(context, parser_input) tree = self.parse_choice() @@ -124,7 +123,10 @@ class BnfParser(BaseParser): if token and token.type != TokenKind.EOF: self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [])) except LexerError as e: - self.add_error(e, False) + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=[e])) value = self.get_return_value_body(context.sheerka, self.source, tree, tree) @@ -283,4 +285,3 @@ class BnfParser(BaseParser): expression.rule_name = token.value self.next_token() return expression - diff --git a/src/parsers/DefaultParser.py b/src/parsers/DefaultParser.py index e250d1b..6093c48 100644 --- a/src/parsers/DefaultParser.py +++ b/src/parsers/DefaultParser.py @@ -1,12 +1,13 @@ -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept -from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF +from dataclasses import dataclass, field + import core.builtin_helpers import core.utils +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept +from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute +from core.tokenizer import Tokenizer, TokenKind, Keywords from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode -from core.tokenizer import Tokenizer, TokenKind, Token, Keywords -from dataclasses import dataclass, field from parsers.BnfParser import BnfParser -from core.sheerka.Sheerka import ExecutionContext @dataclass() @@ -90,11 +91,10 @@ class DefConceptNode(DefaultParserNode): asts = {} for part_key in ConceptParts: prop_value = getattr(self, part_key.value) - if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body, - ParserResultConcept) and hasattr( - prop_value.body.body, "ast_"): + if isinstance(prop_value, ReturnValueConcept) and \ + isinstance(prop_value.body, ParserResultConcept) and \ + hasattr(prop_value.body.body, "ast_"): asts[part_key] = prop_value - # asts[part_key] = prop_value.body.body.ast_ return asts @@ -111,11 +111,6 @@ class DefaultParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "Default", 60) - self.lexer_iter = None - self._current = None - self.context: ExecutionContext = None - self.text = None - self.sheerka = None @staticmethod def fix_indentation(tokens): @@ -129,6 +124,7 @@ class DefaultParser(BaseParser): :param tokens: :return: """ + tokens = tokens.copy() # do not modify ParserInput.tokens if tokens[0].type != TokenKind.COLON: return tokens @@ -143,6 +139,8 @@ class DefaultParser(BaseParser): indent_size = len(tokens[2].value) # now fix the other indentations + # KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant, + # as I now have an editor in interactive mode i = 3 while i < len(tokens) - 1: if tokens[i].type == TokenKind.NEWLINE: @@ -152,44 +150,22 @@ class DefaultParser(BaseParser): if len(tokens[i + 1].value) < indent_size: return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") + tokens[i + 1] = tokens[i + 1].clone() tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) i += 1 return tokens[3:] - def reset_parser(self, context, text): + def reset_parser(self, context, parser_input): self.context = context self.sheerka = context.sheerka + self.parser_input = parser_input + self.parser_input.reset() + self.parser_input.next_token() - self.text = text - self.lexer_iter = iter(Tokenizer(text)) - self._current = None - - self.next_token() - - def add_error(self, error, next_token=True): - self.error_sink.append(error) - if next_token: - self.next_token() - return error - - def get_token(self) -> Token: - return self._current - - def next_token(self, skip_whitespace=True): - try: - self._current = next(self.lexer_iter) - if skip_whitespace: - while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: - self._current = next(self.lexer_iter) - except StopIteration: - self._current = None - - return - - def parse(self, context, parser_input): + def parse(self, context, parser_input: ParserInput): # default parser can only manage string text - if not isinstance(parser_input, str): + if parser_input.from_tokens: ret = context.sheerka.ret( self.name, False, @@ -197,12 +173,14 @@ class DefaultParser(BaseParser): self.log_result(context, parser_input, ret) return ret - tree = None try: self.reset_parser(context, parser_input) tree = self.parse_statement() except core.tokenizer.LexerError as e: - self.add_error(e, False) + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=[e])) # If a error is found it must be sent to error_sink # tree must contain what was recognized @@ -210,26 +188,20 @@ class DefaultParser(BaseParser): if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode): body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink) else: - body = self.get_return_value_body(context.sheerka, parser_input, tree, tree) - # body = self.sheerka.new( - # BuiltinConcepts.PARSER_RESULT, - # parser=self, - # source=text, - # body=self.error_sink if self.has_error else tree, - # try_parsed=tree) + body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree) ret = self.sheerka.ret( self.name, not self.has_error, body) - self.log_result(context, parser_input, ret) + self.log_result(context, parser_input.as_text(), ret) return ret def parse_statement(self): - token = self.get_token() + token = self.parser_input.token if token.value == Keywords.DEF: - self.next_token() + self.parser_input.next_token() self.context.log("Keyword DEF found.", self.name) return self.parse_def_concept(token) else: @@ -282,23 +254,23 @@ class DefaultParser(BaseParser): return concept_name keyword = [] - token = self.get_token() + token = self.parser_input.token if token.value != Keywords.ISA: return self.add_error(CannotHandleErrorNode([token], "")) keyword.append(token) - self.next_token() + self.parser_input.next_token() set_name = self.parse_concept_name() return IsaConceptNode(keyword, concept_name, set_name) def parse_concept_name(self): tokens = [] - token = self.get_token() + token = self.parser_input.token while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD): tokens.append(token) - self.next_token() - token = self.get_token() + self.parser_input.next_token() + token = self.parser_input.token if len(tokens) == 0: return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", [])) @@ -319,7 +291,7 @@ class DefaultParser(BaseParser): Keywords.POST: None, } current_part = Keywords.CONCEPT - token = self.get_token() + token = self.parser_input.token first_token = token # loop thru the tokens, and put them in the correct tokens_found_by_parts entry @@ -334,18 +306,18 @@ class DefaultParser(BaseParser): else: tokens_found_by_parts[keyword] = [token] current_part = keyword - self.next_token() + self.parser_input.next_token() else: tokens_found_by_parts[current_part].append(token) - self.next_token(False) + self.parser_input.next_token(False) - token = self.get_token() + token = self.parser_input.token return first_token, tokens_found_by_parts def get_concept_name(self, first_token, tokens_found_by_parts): name_first_token_index = 1 - token = self.get_token() + token = self.parser_input.token if first_token.value != Keywords.CONCEPT: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 @@ -431,10 +403,11 @@ class DefaultParser(BaseParser): # ask the other parsers if they recognize the tokens with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context: + parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens) to_parse = self.sheerka.ret( sub_context.who, True, - self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens)) + self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input)) steps = [BuiltinConcepts.PARSING] parsed = self.sheerka.execute(sub_context, to_parse, steps) parsing_result = core.builtin_helpers.expect_one(sub_context, parsed) diff --git a/src/parsers/EmptyStringParser.py b/src/parsers/EmptyStringParser.py index 7663377..46f6a15 100644 --- a/src/parsers/EmptyStringParser.py +++ b/src/parsers/EmptyStringParser.py @@ -1,4 +1,5 @@ from core.builtin_concepts import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseParser import BaseParser @@ -10,12 +11,10 @@ class EmptyStringParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "EmptyString", 90) - def parse(self, context, parser_input): + def parse(self, context, parser_input: ParserInput): sheerka = context.sheerka - if isinstance(parser_input, str) and parser_input.strip() == "" or \ - isinstance(parser_input, list) and parser_input == [] or \ - parser_input is None: + if parser_input.is_empty(): ret = sheerka.ret(self.name, True, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, diff --git a/src/parsers/ExactConceptParser.py b/src/parsers/ExactConceptParser.py index 35338b8..453bba8 100644 --- a/src/parsers/ExactConceptParser.py +++ b/src/parsers/ExactConceptParser.py @@ -1,11 +1,12 @@ import logging +import core.builtin_helpers from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from core.concept import VARIABLE_PREFIX +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Keywords, TokenKind, LexerError from core.utils import str_concept from parsers.BaseParser import BaseParser -import core.builtin_helpers class ExactConceptParser(BaseParser): @@ -19,7 +20,7 @@ class ExactConceptParser(BaseParser): BaseParser.__init__(self, "ExactConcept", 80) self.max_word_size = max_word_size - def parse(self, context, parser_input): + def parse(self, context, parser_input: ParserInput): """ text can be string, but text can also be an list of tokens :param context: @@ -31,6 +32,7 @@ class ExactConceptParser(BaseParser): sheerka = context.sheerka try: + parser_input.reset() words = self.get_words(parser_input) except LexerError as e: context.log(f"Error found in tokenizer {e}", self.name) @@ -38,8 +40,8 @@ class ExactConceptParser(BaseParser): if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE): context.log(f"Max words reached. Stopping.", self.name) - too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input) - body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input, reason=too_long) + too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input.as_text()) + body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=too_long) return sheerka.ret(self.name, False, body) already_recognized = [] # keep track of the concepts founds @@ -78,12 +80,13 @@ class ExactConceptParser(BaseParser): already_recognized.append(concept) - by_name = sheerka.resolve(self.get_input_as_text(parser_input)) + by_name = sheerka.resolve(parser_input.as_text()) core.builtin_helpers.set_is_evaluated(by_name) recognized = self.merge_concepts(already_recognized, by_name) if len(recognized) == 0: - ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input)) + ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, + body=parser_input.as_text())) self.log_result(context, parser_input, ret) return ret else: @@ -94,10 +97,10 @@ class ExactConceptParser(BaseParser): self.log_multiple_results(context, parser_input, res) return res - def get_words(self, text): - tokens = self.get_input_as_tokens(text) + @staticmethod + def get_words(parser_input): res = [] - for t in tokens: + for t in parser_input.as_tokens(): if t.type == TokenKind.EOF: break if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE: @@ -173,6 +176,6 @@ class ExactConceptParser(BaseParser): context.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input), + source=parser_input.as_text(), body=concept, try_parsed=concept)) diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index ffa717d..1f652a5 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -4,6 +4,7 @@ from dataclasses import dataclass import core.utils from core.builtin_concepts import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.tokenizer import LexerError, TokenKind from parsers.BaseParser import BaseParser, Node, ErrorNode from parsers.BnfNodeParser import ConceptNode @@ -28,7 +29,7 @@ class PythonNode(Node): self.concepts = concepts or {} # when concepts are recognized in the expression # def __repr__(self): - # return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")" + # return "PythonNode(parser_input='" + self.parser_input + "', ast=" + self.get_dump(self.ast_) + ")" def __repr__(self): ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module" @@ -57,77 +58,6 @@ class PythonNode(Node): return dump -class PythonParser(BaseParser): - """ - Parse Python scripts - """ - - def __init__(self, **kwargs): - - BaseParser.__init__(self, "Python", 50) - self.source = kwargs.get("source", "") - - def parse(self, context, parser_input): - sheerka = context.sheerka - tree = None - - python_switcher = { - TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value) - } - - try: - tracker = {} - source = self.get_input_as_text(parser_input, python_switcher, tracker) - source = source.strip() - parser_input = parser_input if isinstance(parser_input, str) else source - - # first, try to parse an expression - res, tree, error = self.try_parse_expression(source) - if not res: - # then try to parse a statement - res, tree, error = self.try_parse_statement(source) - if not res: - error_node = PythonErrorNode(parser_input, error) - self.error_sink.append(error_node) - - except LexerError as e: - self.error_sink.append(e) - - if self.has_error: - ret = sheerka.ret( - self.name, - False, - sheerka.new( - BuiltinConcepts.NOT_FOR_ME, - body=parser_input, - reason=self.error_sink)) - else: - ret = sheerka.ret( - self.name, - True, - sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=parser_input, - body=PythonNode(parser_input, tree, tracker), - try_parsed=None)) - - self.log_result(context, parser_input, ret) - return ret - - def try_parse_expression(self, text): - try: - return True, ast.parse(text, f"<{self.source}>", 'eval'), None - except Exception as error: - return False, None, error - - def try_parse_statement(self, text): - try: - return True, ast.parse(text, f"<{self.source}>", 'exec'), None - except Exception as error: - return False, None, error - - class PythonGetNamesVisitor(ast.NodeVisitor): """ This visitor will find all the name declared in the ast @@ -206,7 +136,8 @@ class LexerNodeParserHelperForPython: with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context: sub_context.add_inputs(to_parse=to_parse) python_parser = PythonParser() - result = python_parser.parse(sub_context, to_parse) + parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse) + result = python_parser.parse(sub_context, parser_input) sub_context.add_values(return_values=result) if result.status: @@ -216,3 +147,75 @@ class LexerNodeParserHelperForPython: return python_node return result.body # the error + + +class PythonParser(BaseParser): + """ + Parse Python scripts + """ + + def __init__(self, **kwargs): + + BaseParser.__init__(self, "Python", 50) + self.source = kwargs.get("source", "") + + def parse(self, context, parser_input: ParserInput): + sheerka = context.sheerka + tree = None + tracker = {} # to keep track of concept tokens (c:xxx:) + + python_switcher = { + TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value) + } + + try: + parser_input.reset() + + source_code = parser_input.as_text(python_switcher, tracker) + source_code = source_code.strip() + + # first, try to parse an expression + res, tree, error = self.try_parse_expression(source_code) + if not res: + # then try to parse a statement + res, tree, error = self.try_parse_statement(source_code) + if not res: + error_node = PythonErrorNode(parser_input.as_text(), error) + self.error_sink.append(error_node) + + except LexerError as e: + self.error_sink.append(e) + + if self.has_error: + ret = sheerka.ret( + self.name, + False, + sheerka.new( + BuiltinConcepts.NOT_FOR_ME, + body=parser_input.as_text(), + reason=self.error_sink)) + else: + ret = sheerka.ret( + self.name, + True, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input.as_text(), + body=PythonNode(source_code, tree, tracker), + try_parsed=None)) + + self.log_result(context, parser_input.as_text(), ret) + return ret + + def try_parse_expression(self, text): + try: + return True, ast.parse(text, f"<{self.source}>", 'eval'), None + except Exception as error: + return False, None, error + + def try_parse_statement(self, text): + try: + return True, ast.parse(text, f"<{self.source}>", 'exec'), None + except Exception as error: + return False, None, error diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index 52b939a..92dc4f9 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -1,4 +1,5 @@ from core.builtin_concepts import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import SheerkaExecute from parsers.BaseParser import BaseParser from parsers.BnfNodeParser import ConceptNode from parsers.PythonParser import PythonParser @@ -77,8 +78,9 @@ class PythonWithConceptsParser(BaseParser): to_parse += node.source with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context: + parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse) python_parser = PythonParser() - result = python_parser.parse(sub_context, to_parse) + result = python_parser.parse(sub_context, parser_input) if result.status: python_node = result.body.body diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index d81c70d..ed8ebeb 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -6,7 +6,7 @@ from typing import List from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF -from core.sheerka.ExecutionContext import ExecutionContext +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Token, TokenKind, Tokenizer from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ SourceCodeWithConceptNode, BaseNodeParser @@ -313,21 +313,6 @@ class InFixToPostFix: """ return len(self.stack) > 0 and isinstance(self.stack[-1], type) - def _get_lexer_nodes_from_unrecognized(self): - """ - Use the source of self.unrecognized_tokens gto find concepts or source code - :return: - """ - - res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) - only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) - if not only_parsers_results.status: - return None - - return builtin_helpers.get_lexer_nodes( - only_parsers_results.body.body, - self.unrecognized_tokens.start, - self.unrecognized_tokens.tokens) def _make_source_code_with_concept(self, start, rpar_token, end): """ @@ -440,7 +425,10 @@ class InFixToPostFix: self.unrecognized_tokens.fix_source() # try to recognize concepts - nodes_sequences = self._get_lexer_nodes_from_unrecognized() + nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + self.context, + self.unrecognized_tokens, + PARSERS) if nodes_sequences: # There are more than one solution found @@ -482,7 +470,10 @@ class InFixToPostFix: :return: list of function_parser_res """ self.unrecognized_tokens.fix_source() - nodes_sequences = self._get_lexer_nodes_from_unrecognized() + nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + self.context, + self.unrecognized_tokens, + PARSERS) if nodes_sequences is None: return None @@ -908,13 +899,13 @@ class SyaNodeParser(BaseNodeParser): self.concepts_by_first_keyword = {} self.sya_definitions = {} - self.token = None - self.pos = -1 - self.tokens = None - - self.context: ExecutionContext = None - self.text = None - self.sheerka = None + # self.token = None + # self.pos = -1 + # self.tokens = None + # + # self.context: ExecutionContext = None + # self.text = None + # self.sheerka = None def init_from_concepts(self, context, concepts, **kwargs): super().init_from_concepts(context, concepts) @@ -954,15 +945,15 @@ class SyaNodeParser(BaseNodeParser): return sya_concept_def - def infix_to_postfix(self, context, text): + def infix_to_postfix(self, context, parser_input: ParserInput): """ Implementing Shunting Yard Algorithm :param context: - :param text: + :param parser_input: :return: """ - if not self.reset_parser(context, text): + if not self.reset_parser(context, parser_input): return None forked = [] @@ -978,32 +969,32 @@ class SyaNodeParser(BaseNodeParser): forked.clear() res = [InFixToPostFix(context)] - while self.next_token(False): + while self.parser_input.next_token(False): for infix_to_postfix in res: infix_to_postfix.reset() - token = self.get_token() + token = self.parser_input.token try: if token.type in (TokenKind.LPAR, TokenKind.RPAR): # little optim, no need to lock, unlock or get the concept when parenthesis for infix_to_postfix in res: - infix_to_postfix.eat_token(token, self.pos) + infix_to_postfix.eat_token(token, self.parser_input.pos) continue for infix_to_postfix in res: - if infix_to_postfix.eat_token(token, self.pos): + if infix_to_postfix.eat_token(token, self.parser_input.pos): infix_to_postfix.lock() concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def) if not concepts: for infix_to_postfix in res: - infix_to_postfix.eat_unrecognized(token, self.pos) + infix_to_postfix.eat_unrecognized(token, self.parser_input.pos) continue if len(concepts) == 1: for infix_to_postfix in res: - infix_to_postfix.eat_concept(concepts[0], token, self.pos) + infix_to_postfix.eat_concept(concepts[0], token, self.parser_input.pos) continue # make the cartesian product @@ -1012,7 +1003,7 @@ class SyaNodeParser(BaseNodeParser): for concept in concepts: clone = infix_to_postfix.clone() temp_res.append(clone) - clone.eat_concept(concept, token, self.pos) + clone.eat_concept(concept, token, self.parser_input.pos) res = temp_res finally: @@ -1036,14 +1027,15 @@ class SyaNodeParser(BaseNodeParser): while len(item.nodes) > 0: res = self.postfix_to_item(sheerka, item.nodes) if isinstance(res, PostFixToItem): - items.append(ConceptNode(res.concept, res.start, res.end, self.tokens[res.start: res.end + 1])) + items.append( + ConceptNode(res.concept, res.start, res.end, self.parser_input.tokens[res.start: res.end + 1])) else: items.append(res) item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \ isinstance(res, UnrecognizedTokensNode) item.nodes = items item.fix_all_pos() - item.tokens = self.tokens[item.start:item.end + 1] + item.tokens = self.parser_input.tokens[item.start:item.end + 1] item.fix_source(True) return item @@ -1069,14 +1061,14 @@ class SyaNodeParser(BaseNodeParser): return PostFixToItem(concept, start, end, has_unrecognized) - def parse(self, context, parser_input): + def parse(self, context, parser_input: ParserInput): """ :param context: :param parser_input: :return: """ - if parser_input == "": + if parser_input.is_empty(): return context.sheerka.ret( self.name, False, @@ -1096,7 +1088,7 @@ class SyaNodeParser(BaseNodeParser): return self.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text())) for infix_to_postfix in valid_infix_to_postfixs: sequence = [] @@ -1106,7 +1098,10 @@ class SyaNodeParser(BaseNodeParser): has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \ isinstance(item, UnrecognizedTokensNode) if isinstance(item, PostFixToItem): - to_insert = ConceptNode(item.concept, item.start, item.end, self.tokens[item.start: item.end + 1]) + to_insert = ConceptNode(item.concept, + item.start, + item.end, + self.parser_input.tokens[item.start: item.end + 1]) else: to_insert = item sequence.insert(0, to_insert) diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index cb89979..0d1f397 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -109,7 +109,7 @@ class UnrecognizedNodeParser(BaseParser): _validate_concept(value) elif isinstance(value, UnrecognizedTokensNode): - res = parse_unrecognized(context, value.tokens, PARSERS) + res = parse_unrecognized(context, value.source, PARSERS) res = only_successful(context, res) # only key successful parsers if res.status: concept.compiled[name] = res.body.body diff --git a/src/sheerkapickle/SheerkaPickler.py b/src/sheerkapickle/SheerkaPickler.py index 33e04f3..3bcec93 100644 --- a/src/sheerkapickle/SheerkaPickler.py +++ b/src/sheerkapickle/SheerkaPickler.py @@ -3,6 +3,7 @@ from logging import Logger import core.utils from core.concept import Concept +from core.sheerka.services.SheerkaExecute import ParserInput from sheerkapickle import utils, tags, handlers @@ -37,6 +38,7 @@ class SheerkaPickler: from parsers.BaseParser import BaseParser from evaluators.BaseEvaluator import BaseEvaluator self.to_reduce.append(ToReduce(lambda o: isinstance(o, (BaseParser, BaseEvaluator)), lambda o: o.name)) + self.to_reduce.append(ToReduce(lambda o: isinstance(o, ParserInput), lambda o: o.as_text())) def flatten(self, obj): if utils.is_primitive(obj): diff --git a/src/sheerkapickle/sheerka_handlers.py b/src/sheerkapickle/sheerka_handlers.py index 188c0e2..a3f41c6 100644 --- a/src/sheerkapickle/sheerka_handlers.py +++ b/src/sheerkapickle/sheerka_handlers.py @@ -1,5 +1,6 @@ from core.builtin_concepts import UserInputConcept, ReturnValueConcept, BuiltinConcepts from core.sheerka.Sheerka import Sheerka +from core.sheerka.services.SheerkaExecute import ParserInput from evaluators.BaseEvaluator import BaseEvaluator from parsers.BaseParser import BaseParser from sheerkapickle.handlers import BaseHandler, registry @@ -79,7 +80,9 @@ class UserInputHandler(ConceptHandler): def flatten(self, obj: UserInputConcept, data): data[CONCEPT_ID] = (obj.key, obj.id) data["user_name"] = obj.user_name - data["text"] = BaseParser.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else obj.text + data["text"] = BaseParser.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else \ + obj.text.as_text() if isinstance(obj.text, ParserInput) else \ + obj.text return data def new(self, data): diff --git a/tests/BaseTest.py b/tests/BaseTest.py index 0de2842..ec5a473 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -24,10 +24,10 @@ class BaseTest: def get_default_concept(self): concept = Concept( name="a + b", - where="isinstance(a, int) and isinstance(b, int)", - pre="isinstance(a, int) and isinstance(b, int)", - post="isinstance(res, int)", - body="def func(x,y):\n return x+y\nfunc(a,b)", + where="isinstance(a, int) and isinstance(b, int)\n", + pre="isinstance(a, int) and isinstance(b, int)\n", + post="isinstance(res, int)\n", + body="def func(x,y):\n return x+y\nfunc(a,b)\n", desc="specific description") concept.def_var("a", "value1") concept.def_var("b", "value2") diff --git a/tests/core/test_ParserInput.py b/tests/core/test_ParserInput.py new file mode 100644 index 0000000..81cdd5d --- /dev/null +++ b/tests/core/test_ParserInput.py @@ -0,0 +1,79 @@ +import pytest +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Tokenizer + + +@pytest.mark.parametrize("text, start, end, expected", [ + ("def concept a", None, None, "def concept a"), + ("&é#(-è_çà)='string'", None, None, "&é#(-è_çà)='string'"), + ("def concept a", 2, None, "concept a"), + ("def concept a", 0, 2, "def concept"), +]) +def test_i_can_use_parser_input(text, start, end, expected): + parser_input = ParserInput(text, start=start, end=end).reset() + assert parser_input.as_text() == expected + + +def test_i_can_get_the_next_token(): + parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'").reset() + res = [] + while parser_input.next_token(): + res.append(f"{parser_input.token.str_value}") + + assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", ''] + + parser_input = ParserInput("def concept a concept name from bnf 'xyz' as 'xyz'", start=4, end=9).reset() + res = [] + while parser_input.next_token(skip_whitespace=False): + res.append(f"{parser_input.token.str_value}") + + assert res == ['a', ' ', 'concept', ' ', 'name', ' '] + + +def test_i_can_get_the_next_token_when_initialised_with_tokens(): + tokens = list(Tokenizer(" def concept a as 'xyz' ")) + parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset() + res = [] + while parser_input.next_token(): + res.append(f"{parser_input.token.str_value}") + + assert res == ['def', 'concept', 'a', 'as', "'xyz'", ''] + + tokens = list(Tokenizer(" def concept a as 'xyz' ", yield_eof=False)) + parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset() + res = [] + while parser_input.next_token(): + res.append(f"{parser_input.token.str_value}") + + assert res == ['def', 'concept', 'a', 'as', "'xyz'"] + + +def test_i_can_parse_twice(): + text = """ + def concept a + b + where isinstance(a, int) and isinstance(b, int) + pre isinstance(a, int) and isinstance(b, int) + post isinstance(res, int) + as: + def func(x,y): + return x+y + func(a,b) + """ + + p1 = ParserInput(text).reset() + while p1.next_token(): + pass + + p1.reset() + p2 = ParserInput(text).reset() + + while p1.next_token(): + p2.next_token() + assert p1.token == p2.token + + p1.reset() + p2 = ParserInput(text).reset() + + while p2.next_token(): + p1.next_token() + assert p1.token == p2.token diff --git a/tests/core/test_sheerka_call_parsers.py b/tests/core/test_sheerka_call_parsers.py index 71ef47d..dfae9b3 100644 --- a/tests/core/test_sheerka_call_parsers.py +++ b/tests/core/test_sheerka_call_parsers.py @@ -1,4 +1,5 @@ from core.builtin_concepts import ReturnValueConcept, UserInputConcept, BuiltinConcepts, ParserResultConcept +from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseParser import BaseParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -22,7 +23,7 @@ class BaseTestParser(BaseParser): @staticmethod def _get_source(text_): - return text_ if isinstance(text_, str) else text_.body + return text_.as_text() if isinstance(text_, ParserInput) else text_.body def _out(self, name, priority, status, source): debug = f"name={name}" @@ -33,7 +34,7 @@ class BaseTestParser(BaseParser): def parse(self, context, text): self._out(self._get_name(self.name), self.priority, self.status, self._get_source(text)) - value = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + value = self._get_name(self.name) + ":" + self._get_source(text) parser_result = ParserResultConcept(parser=self, value=value) return ReturnValueConcept(self, self.status, self.parser_result or parser_result) @@ -54,8 +55,8 @@ class Enabled80MultipleFalseParser(BaseTestParser): def parse(self, context, text): self._out(self._get_name(self.name), self.priority, self.status, self._get_source(text)) - value1 = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + "_1" - value2 = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + "_2" + value1 = self._get_name(self.name) + ":" + self._get_source(text) + "_1" + value2 = self._get_name(self.name) + ":" + self._get_source(text) + "_2" return [ ReturnValueConcept(self, self.status, ParserResultConcept(parser=self, value=value1)), ReturnValueConcept(self, self.status, ParserResultConcept(parser=self, value=value2)), @@ -68,8 +69,8 @@ class Enabled80MultipleTrueParser(BaseTestParser): def parse(self, context, text): self._out(self._get_name(self.name), self.priority, self.status, self._get_source(text)) - value1 = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + "_1" - value2 = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + "_2" + value1 = self._get_name(self.name) + ":" + self._get_source(text) + "_1" + value2 = self._get_name(self.name) + ":" + self._get_source(text) + "_2" return [ ReturnValueConcept(self, True, ParserResultConcept(parser=self, value=value1)), ReturnValueConcept(self, False, ParserResultConcept(parser=self, value=value2)), @@ -90,7 +91,7 @@ class Enabled50TrueParser(BaseTestParser): status = isinstance(text, ParserResultConcept) and source == "Enabled80False:Enabled90False:hello world" self._out(self._get_name(self.name), self.priority, status, source) - value = self._get_name(self.name) + ":" + (text if isinstance(text, str) else text.body) + value = self._get_name(self.name) + ":" + self._get_source(text) return_value = ParserResultConcept(parser=self, value=value) return ReturnValueConcept(self, status, return_value) diff --git a/tests/evaluators/test_LexerNodeEvaluator.py b/tests/evaluators/test_LexerNodeEvaluator.py index 3e4ed7a..7850970 100644 --- a/tests/evaluators/test_LexerNodeEvaluator.py +++ b/tests/evaluators/test_LexerNodeEvaluator.py @@ -3,6 +3,7 @@ import ast import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve +from core.sheerka.services.SheerkaExecute import ParserInput from evaluators.LexerNodeEvaluator import LexerNodeEvaluator from parsers.BaseNodeParser import SourceCodeNode from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, UnrecognizedTokensNode @@ -17,7 +18,7 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): parser = BnfNodeParser() parser.init_from_concepts(context, concepts) - ret_val = parser.parse(context, expression) + ret_val = parser.parse(context, ParserInput(expression)) assert ret_val.status return ret_val diff --git a/tests/evaluators/test_PythonEvaluator.py b/tests/evaluators/test_PythonEvaluator.py index ba5c6b0..f4be56d 100644 --- a/tests/evaluators/test_PythonEvaluator.py +++ b/tests/evaluators/test_PythonEvaluator.py @@ -2,6 +2,7 @@ import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput from evaluators.PythonEvaluator import PythonEvaluator from parsers.PythonParser import PythonNode, PythonParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -31,7 +32,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): ]) def test_i_can_eval(self, text, expected): context = self.get_context() - parsed = PythonParser().parse(context, text) + parsed = PythonParser().parse(context, ParserInput(text)) evaluated = PythonEvaluator().eval(context, parsed) @@ -40,7 +41,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): def test_i_can_eval_using_context(self): context = self.get_context() - parsed = PythonParser().parse(context, "test_using_context('value for param1', 10)") + parsed = PythonParser().parse(context, ParserInput("test_using_context('value for param1', 10)")) evaluated = PythonEvaluator().eval(context, parsed) @@ -49,7 +50,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): def test_i_can_eval_using_context_when_self_is_not_sheerka(self): sheerka, context = self.init_concepts() - parsed = PythonParser().parse(context, "create_new_concept(Concept('foo'))") + parsed = PythonParser().parse(context, ParserInput("create_new_concept(Concept('foo'))")) evaluated = PythonEvaluator().eval(context, parsed) @@ -66,7 +67,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): context = self.get_context() context.sheerka.add_in_cache(Concept("foo")) - parsed = PythonParser().parse(context, "foo") + parsed = PythonParser().parse(context, ParserInput("foo")) evaluated = PythonEvaluator().eval(context, parsed) assert not evaluated.status @@ -80,7 +81,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): context = self.get_context() context.sheerka.add_in_cache(Concept("foo", body="1")) - parsed = PythonParser().parse(context, "foo + 2") + parsed = PythonParser().parse(context, ParserInput("foo + 2")) evaluated = PythonEvaluator().eval(context, parsed) assert evaluated.status @@ -94,7 +95,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): context = self.get_context() context.sheerka.add_in_cache(Concept("foo")) - parsed = PythonParser().parse(context, "def a(b):\n return b\na(c:foo:)") + parsed = PythonParser().parse(context, ParserInput("def a(b):\n return b\na(c:foo:)")) evaluated = PythonEvaluator().eval(context, parsed) assert evaluated.status @@ -108,7 +109,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): context = self.get_context() context.sheerka.add_in_cache(Concept("foo", body="2")) - parsed = PythonParser().parse(context, "def a(b):\n return b\na(foo)") + parsed = PythonParser().parse(context, ParserInput("def a(b):\n return b\na(foo)")) evaluated = PythonEvaluator().eval(context, parsed) assert evaluated.status @@ -118,7 +119,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): context = self.get_context() context.sheerka.add_in_cache(Concept("foo", body="2")) - parsed = PythonParser().parse(context, "get_concept_name(c:foo:)") + parsed = PythonParser().parse(context, ParserInput("get_concept_name(c:foo:)")) python_evaluator = PythonEvaluator() python_evaluator.locals["get_concept_name"] = get_concept_name evaluated = python_evaluator.eval(context, parsed) @@ -127,7 +128,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): assert evaluated.value == "foo" # sanity, does not work otherwise - parsed = PythonParser().parse(context, "get_concept_name(foo)") + parsed = PythonParser().parse(context, ParserInput("get_concept_name(foo)")) python_evaluator = PythonEvaluator() python_evaluator.locals["get_concept_name"] = get_concept_name evaluated = python_evaluator.eval(context, parsed) @@ -141,7 +142,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): self.from_def_concept("mult", "a mult b", ["a", "b"]), ) - parsed = PythonParser().parse(context, "is_greater_than(BuiltinConcepts.PRECEDENCE, mult, plus)") + parsed = PythonParser().parse(context, ParserInput("is_greater_than(BuiltinConcepts.PRECEDENCE, mult, plus)")) python_evaluator = PythonEvaluator() evaluated = python_evaluator.eval(context, parsed) diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 451923b..6a16e25 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -100,7 +100,7 @@ as: def func(x,y): return x+y func(a,b) - """ +""" expected = self.get_default_concept() expected.metadata.id = "1001" @@ -167,7 +167,7 @@ as: def func(x,y): return x+y func(a,b) - """ +""" sheerka = self.get_sheerka() sheerka.evaluate_user_input(text) diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index 4d95668..1c91284 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -1,6 +1,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput from parsers.AtomNodeParser import AtomNodeParser from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN @@ -26,7 +27,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): def test_i_cannot_parse_empty_string(self): sheerka, context, parser = self.init_parser({}) - res = parser.parse(context, "") + res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) @@ -49,7 +50,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -72,7 +73,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -110,7 +111,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -137,7 +138,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -158,7 +159,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - list_of_res = parser.parse(context, text) + list_of_res = parser.parse(context, ParserInput(text)) assert len(list_of_res) == len(expected) for i, res in enumerate(list_of_res): @@ -180,7 +181,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): text = "one two x$!# one two" sheerka, context, parser = self.init_parser(concepts_map) - list_of_res = parser.parse(context, text) + list_of_res = parser.parse(context, ParserInput(text)) expected = [ (False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]), @@ -210,7 +211,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map) sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number")) - res = parser.parse(context, "one") + res = parser.parse(context, ParserInput("one")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "one", ["one"]) assert lexer_nodes == expected_array @@ -223,16 +224,36 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, "a special concept") + res = parser.parse(context, ParserInput("a special concept")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "a special concept", ["a special concept"]) assert lexer_nodes == expected_array - res = parser.parse(context, "isa") + res = parser.parse(context, ParserInput("isa")) lexer_nodes = res.body.body expected_array = compute_expected_array(concepts_map, "isa", ["isa"]) assert lexer_nodes == expected_array + def test_i_can_parse_concepts_when_sub_tokens(self): + concepts_map = { + "foo": Concept("foo"), + "bar": Concept("bar"), + } + + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) + text = "not recognized foo bar not not recognized" + expected = ["foo", "bar"] + parser_input = ParserInput(text, start=3, end=7) + res = parser.parse(context, parser_input) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + @pytest.mark.parametrize("text", [ "foo", f"foo one", @@ -249,7 +270,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @@ -270,7 +291,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False) - list_of_res = parser.parse(context, text) + list_of_res = parser.parse(context, ParserInput(text)) assert len(list_of_res) == len(expected) @@ -293,7 +314,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -321,7 +342,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) lexer_nodes = res.body.body assert res.status diff --git a/tests/parsers/test_BaseParser.py b/tests/parsers/test_BaseParser.py index 1c21056..e50106e 100644 --- a/tests/parsers/test_BaseParser.py +++ b/tests/parsers/test_BaseParser.py @@ -81,3 +81,4 @@ def test_i_can_test_split_iter_parser_indexes(): ]) def test_i_can_get_tokens_boundaries(tokens, expected): assert BaseParser.get_tokens_boundaries(tokens) == expected + diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index 0352673..c2a5401 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -1,6 +1,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC +from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ Optional, ZeroOrMore, OneOrMore, ConceptExpression @@ -95,7 +96,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): parser = BnfNodeParser() parser.init_from_concepts(context, updated) - parser.reset_parser(context, text) + parser.reset_parser(context, ParserInput(text)) bnf_parsers_helpers = parser.get_concepts_sequences() @@ -113,34 +114,11 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): my_map, text, expected, multiple_result, post_init_concepts ) return sequences - # sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) - # if not multiple_result: - # expected_array = [compute_expected_array(my_map, text, expected)] - # else: - # expected_array = [compute_expected_array(my_map, text, e) for e in expected] - # - # if post_init_concepts: - # post_init_concepts(sheerka, context) - # - # parser = BnfNodeParser() - # parser.init_from_concepts(context, updated) - # parser.reset_parser(context, text) - # - # bnf_parsers_helpers = parser.get_concepts_sequences() - # - # assert len(bnf_parsers_helpers) == len(expected_array) - # for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): - # assert parser_helper.sequence == expected_sequence - # - # if len(bnf_parsers_helpers) == 1: - # return bnf_parsers_helpers[0].sequence - # else: - # return [pe.sequence for pe in bnf_parsers_helpers] def test_i_cannot_parse_empty_strings(self): sheerka, context, parser = self.init_parser({}, singleton=True) - res = parser.parse(context, "") + res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @@ -738,7 +716,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(my_map, singleton=True) parser.init_from_concepts(context, my_map.values()) - parser.reset_parser(context, "one three") + parser.reset_parser(context, ParserInput("one three")) sequences = parser.get_concepts_sequences() sequence = parser.get_valid(sequences) @@ -756,7 +734,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_parse(self, parser_input, expected_status, expected): sheerka, context, parser = self.init_parser(init_from_sheerka=True) - res = parser.parse(context, parser_input) + res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) parser_result = res.value concepts_nodes = res.value.value @@ -771,7 +749,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): parser_input = "def one" expected = [CNC("def number", source="def one", number="one", one="one")] - res = parser.parse(context, parser_input) + res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) expected_array[0].compiled["def"] = cmap["def_only"] diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 4e9a98b..da2be80 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -1,6 +1,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind, LexerError, Token from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenErrorNode @@ -135,7 +136,6 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("1|", UnexpectedEndOfFileError()), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.RPAR])), ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", eof_token, [TokenKind.IDENTIFIER])), - ("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6)) ]) def test_i_can_detect_errors(self, expression, error): sheerka, context, parser = self.init_parser() @@ -146,6 +146,17 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): assert not res.status assert ret_value[0] == error + def test_i_can_detect_lexer_error(self): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, Tokenizer("'name")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + assert isinstance(res.body.body[0], LexerError) + assert res.body.body[0].message == "Missing Trailing quote" + assert res.body.body[0].text == "'name" + def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar") @@ -158,15 +169,15 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): bnf_parser = BnfNodeParser() bnf_parser.init_from_concepts(context, [foo, bar]) - res = bnf_parser.parse(context, "twenty two") + res = bnf_parser.parse(context, ParserInput("twenty two")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] - res = bnf_parser.parse(context, "thirty one") + res = bnf_parser.parse(context, ParserInput("thirty one")) assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] - res = bnf_parser.parse(context, "twenty") + res = bnf_parser.parse(context, ParserInput("twenty")) assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefaultParser.py index dc2f5c5..b01d8f0 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefaultParser.py @@ -1,8 +1,10 @@ import ast +from dataclasses import dataclass import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Keywords, Tokenizer, LexerError from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch from parsers.BnfParser import BnfParser @@ -39,7 +41,7 @@ def get_def_concept(name, where=None, pre=None, post=None, body=None, definition def get_concept_part(part): if isinstance(part, str): - node = PythonNode(part, ast.parse(part, mode="eval")) + node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval")) return ReturnValueConcept( who="parsers.Default", status=True, @@ -48,6 +50,16 @@ def get_concept_part(part): parser=PythonParser(), value=node)) + if isinstance(part, PN): + node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode)) + return ReturnValueConcept( + who="parsers.Default", + status=True, + value=ParserResultConcept( + source=part.source, + parser=PythonParser(), + value=node)) + if isinstance(part, PythonNode): return ReturnValueConcept( who="parsers.Default", @@ -61,6 +73,12 @@ def get_concept_part(part): return part +@dataclass +class PN: + source: str # parser result source + mode: str # compilation mode + + class TestDefaultParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): @@ -82,7 +100,7 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_parse_def_concept(self, text, expected): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) node = res.value.value assert res.status @@ -99,14 +117,14 @@ post isinstance(res, int) as res = a + b """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value expected_concept = get_def_concept( name="a plus b", - where="a,b", - pre="isinstance(a, int) and isinstance(b, float)", - post="isinstance(res, int)", - body=PythonNode("res = a + b", ast.parse("res = a + b", mode="exec")) + where="a,b\n", + pre="isinstance(a, int) and isinstance(b, float)\n", + post="isinstance(res, int)\n", + body=PN("res = a + b\n ", "exec") ) assert res.status @@ -119,17 +137,15 @@ def concept add one to a as def func(x): return x+1 func(a) - """ +""" expected_concept = get_def_concept( name="add one to a ", - body=PythonNode( - "def func(x):\n return x+1\nfunc(a)", - ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) + body=PN("def func(x):\n return x+1\nfunc(a)\n", "exec") ) sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert res.status @@ -141,8 +157,7 @@ func(a) def concept add one to a as: def func(x): return x+1 - func(a) - """ + func(a)""" expected_concept = get_def_concept( name="add one to a ", @@ -152,7 +167,7 @@ def concept add one to a as: ) sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert res.status @@ -168,7 +183,7 @@ func(a) """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -184,7 +199,7 @@ def concept add one to a as func(a) """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -194,7 +209,7 @@ def concept add one to a as text = "def concept as 'hello'" sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -205,9 +220,9 @@ def concept add one to a as def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self): text = "def hello as a where b pre c post d" - expected_concept = get_def_concept(name="hello", body="a", where="b", pre="c", post="d") + expected_concept = get_def_concept(name="hello", body="a ", where="b ", pre="c ", post="d") sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -225,7 +240,7 @@ def concept add one to a as ]) def test_i_can_detect_error_in_declaration(self, text): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -235,7 +250,7 @@ def concept add one to a as text = "def concept hello \n my friend as 'hello'" sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status @@ -244,7 +259,7 @@ def concept add one to a as def test_i_can_parse_def_concept_from_bnf(self): text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" sheerka, context, parser, a_concept = self.init_parser("a_concept") - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) node = res.value.value definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string")) @@ -260,7 +275,7 @@ def concept add one to a as def test_i_can_parse_def_concept_where_bnf_references_itself(self): text = "def concept name from bnf 'a' + name?" sheerka, context, parser, a_concept = self.init_parser("a_concept") - parser.parse(context, text) + parser.parse(context, ParserInput(text)) assert not parser.has_error @@ -271,7 +286,7 @@ def concept add one to a as ]) def test_i_can_detect_empty_bnf_declaration(self, text): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration") @@ -281,7 +296,7 @@ def concept add one to a as "def concept addition from def a plus b as a + b"]) def test_i_can_def_concept_from_definition(self, text): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) expected = get_def_concept("addition", definition="a plus b", body="a + b") node = res.value.value @@ -294,7 +309,7 @@ def concept add one to a as def test_i_can_detect_not_for_me(self): text = "hello world" sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME) @@ -303,7 +318,7 @@ def concept add one to a as def test_i_can_parse_is_a(self): text = "the name of my 'concept' isa the name of the set" sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) expected = IsaConceptNode([], concept=NameNode(list(Tokenizer("the name of my 'concept'"))), set=NameNode(list(Tokenizer("the name of the set")))) @@ -323,7 +338,7 @@ def concept add one to a as ]) def test_i_cannot_parse_invalid_entries(self, text): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert isinstance(res.body, ParserResultConcept) @@ -339,10 +354,10 @@ def concept add one to a as ]) def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status - assert isinstance(res.body, ParserResultConcept) + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], LexerError) assert res.body.body[0].message == error_msg assert res.body.body[0].text == error_text @@ -350,7 +365,7 @@ def concept add one to a as def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self): text = "def concept name from bnf unknown" sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) diff --git a/tests/parsers/test_ExactConceptParser.py b/tests/parsers/test_ExactConceptParser.py index e3a0cdf..d4f1b82 100644 --- a/tests/parsers/test_ExactConceptParser.py +++ b/tests/parsers/test_ExactConceptParser.py @@ -1,5 +1,6 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, CMV +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from parsers.ExactConceptParser import ExactConceptParser @@ -55,7 +56,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): concept = self.create_and_add_in_cache_concept(sheerka, "hello world") source = "hello world" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -71,7 +72,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): self.create_and_add_in_cache_concept(sheerka, "hello a", variables=["a"]) source = "hello world" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) assert len(results) == 2 results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets @@ -91,7 +92,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): concept = self.create_and_add_in_cache_concept(sheerka, "a + b", ["a", "b"]) source = "10 + 5" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) assert len(results) == 1 assert results[0].status @@ -107,7 +108,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): concept = self.create_and_add_in_cache_concept(sheerka, "a + b + a", ["a", "b"]) source = "10 + 5 + 10" - results = ExactConceptParser(max_word_size=5).parse(context, source) + results = ExactConceptParser(max_word_size=5).parse(context, ParserInput(source)) assert len(results) == 1 assert results[0].status @@ -122,7 +123,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): ) source = "10 + 5" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -135,7 +136,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): sheerka, context, foo = self.init_concepts("foo") source = "c:foo:" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -152,7 +153,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): ) source = "c:one: plus c:two:" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -168,7 +169,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): ) source = "z is a concept" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -178,7 +179,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert not concept_found.metadata.is_evaluated source = "def concept z" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser().parse(context, ParserInput(source)) concept_found = results[0].value.value assert len(results) == 1 @@ -190,7 +191,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): def test_i_can_manage_unknown_concept(self): context = self.get_context(self.get_sheerka(singleton=True)) source = "def concept hello" # this is not a concept by itself - res = ExactConceptParser().parse(context, source) + res = ExactConceptParser().parse(context, ParserInput(source)) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) @@ -199,7 +200,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): def test_i_can_detect_concepts_too_long(self): context = self.get_context(self.get_sheerka(singleton=True)) source = "a very but finally too long concept" - res = ExactConceptParser().parse(context, source) + res = ExactConceptParser().parse(context, ParserInput(source)) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME) @@ -207,15 +208,15 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert res.value.reason.body == source assert res.value.body == source - def test_i_can_detect_concept_from_tokens(self): - context = self.get_context(self.get_sheerka(singleton=True)) - concept = get_concept("hello world", []) - context.sheerka.add_in_cache(concept) - - source = "hello world" - results = ExactConceptParser().parse(context, list(Tokenizer(source))) - - assert len(results) == 1 - assert results[0].status - assert results[0].value.value == concept - assert not results[0].value.value.metadata.need_validation + # def test_i_can_detect_concept_from_tokens(self): + # context = self.get_context(self.get_sheerka(singleton=True)) + # concept = get_concept("hello world", []) + # context.sheerka.add_in_cache(concept) + # + # source = "hello world" + # results = ExactConceptParser().parse(context, list(Tokenizer(source))) + # + # assert len(results) == 1 + # assert results[0].status + # assert results[0].value.value == concept + # assert not results[0].value.value.metadata.need_validation diff --git a/tests/parsers/test_PythonParser.py b/tests/parsers/test_PythonParser.py index 6afaa5a..ba17227 100644 --- a/tests/parsers/test_PythonParser.py +++ b/tests/parsers/test_PythonParser.py @@ -3,7 +3,8 @@ import ast import core.utils import pytest from core.builtin_concepts import ParserResultConcept, NotForMeConcept -from core.tokenizer import Tokenizer, LexerError +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import LexerError from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -17,21 +18,7 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_parse_a_simple_expression(self, text, expected): parser = PythonParser() - res = parser.parse(self.get_context(), text) - - assert res.status - assert res.who == parser.name - assert isinstance(res.value, ParserResultConcept) - assert res.value.value == expected - - @pytest.mark.parametrize("text, expected", [ - ("1+1", PythonNode("1+1", ast.parse("1+1", mode="eval"))), - ("a=10", PythonNode("a=10", ast.parse("a=10", mode="exec"))), - ]) - def test_i_can_parse_from_tokens(self, text, expected): - parser = PythonParser() - tokens = list(Tokenizer(text)) - res = parser.parse(self.get_context(), tokens) + res = parser.parse(self.get_context(), ParserInput(text)) assert res.status assert res.who == parser.name @@ -40,12 +27,11 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text", [ "1+", - "'name", - "foo = 'name" + "&#é", ]) - def test_i_can_detect_error(self, text): + def test_i_can_detect_python_error(self, text): parser = PythonParser() - res = parser.parse(self.get_context(), text) + res = parser.parse(self.get_context(), ParserInput(text)) assert not res.status assert res.who == parser.name @@ -61,7 +47,7 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_detect_lexer_errors(self, text, error_msg, error_text): parser = PythonParser() - res = parser.parse(self.get_context(), text) + res = parser.parse(self.get_context(), ParserInput(text)) assert not res.status assert isinstance(res.value, NotForMeConcept) @@ -76,12 +62,13 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): text = "c:name|id: + 1" parser = PythonParser() - res = parser.parse(self.get_context(), text) + res = parser.parse(self.get_context(), ParserInput(text)) encoded = core.utils.encode_concept(("name", "id")) assert res + assert res.value.source == "c:name|id: + 1" assert res.value.value == PythonNode( - "c:name|id: + 1", + "__C__KEY_name__ID_id__C__ + 1", ast.parse(encoded + "+1", mode="eval")) assert res.value.value.concepts == { encoded: ("name", "id") diff --git a/tests/parsers/test_PythonWithConceptsParser.py b/tests/parsers/test_PythonWithConceptsParser.py index dfcb899..8ea43bf 100644 --- a/tests/parsers/test_PythonWithConceptsParser.py +++ b/tests/parsers/test_PythonWithConceptsParser.py @@ -3,6 +3,7 @@ import ast import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import Concept +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Token, TokenKind, Tokenizer from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode from parsers.PythonParser import PythonNode @@ -38,6 +39,7 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, interested", [ ("not parser result", False), + (ParserInput("not parser result"), False), (ParserResultConcept(parser="not multiple_concepts_parser"), False), (ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), ]) diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index ff6b060..dcd8541 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -2,6 +2,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, CC from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \ SCWC, CNC, UTN @@ -198,7 +199,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -224,7 +225,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -248,7 +249,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -291,7 +292,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -315,7 +316,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -337,7 +338,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, None) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 assert res[0].out == expected_array @@ -377,7 +378,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -409,7 +410,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -430,7 +431,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, None) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 @@ -453,7 +454,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, sya_def) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 @@ -468,7 +469,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_binary_with_precedence(self, expression, expected): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 @@ -489,7 +490,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): expression = "suffixed a prefixed" expected = ["a", "prefixed", "suffixed"] - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 @@ -504,7 +505,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): expression = "suffixed a prefixed" expected = ["a", "suffixed", "prefixed"] - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 @@ -524,7 +525,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, sya_def) expression = "one equals two equals three" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected = ["one", "two", "three", ("equals", 1), "equals"] expected_array = compute_expected_array(concepts_map, expression, expected) @@ -546,7 +547,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, sya_def) expression = "one plus two plus three" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected = ["one", "two", "plus", "three", ("plus", 1)] expected_array = compute_expected_array(concepts_map, expression, expected) @@ -576,7 +577,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, sya_def) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 assert res[0].out == expected_array @@ -603,7 +604,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sheerka, context, parser = self.init_parser(concepts_map, sya_def) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(concepts_map, expression, expected) assert len(res) == 1 assert res[0].out == expected_array @@ -617,7 +618,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, None) expression = "foo bar baz" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_sequences = [ [UTN("bar "), "foo", "baz"], ["baz", "foo bar"] @@ -664,7 +665,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_pos_fix_when_parenthesis(self, expression, expected): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 @@ -719,7 +720,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): @@ -745,7 +746,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == 1 assert res[0].errors == [expected] @@ -756,7 +757,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_detected_when_too_many_parameters(self, expression, expected): sheerka, context, parser = self.init_parser(cmap, None) - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == 1 assert len(res[0].errors) == 1 @@ -790,7 +791,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_sequences(self, expression, expected): sheerka, context, parser = self.init_parser() - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 @@ -808,7 +809,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() expected = ["one", "two", "three", "?"] - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 @@ -830,7 +831,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, sya_def) expression = "a plus plus equals b" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = tests.parsers.parsers_utils.compute_debug_array(res) assert expected_array == [ ["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"], @@ -859,9 +860,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, sya_def) expression = "a plus complex infix b" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) - res = parser.parse(context, expression) + res = parser.parse(context, ParserInput(expression)) pass def test_i_can_use_string_instead_of_identifier(self): @@ -874,7 +875,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, None) - res = parser.infix_to_postfix(context, "one ? ? two '::' three") + res = parser.infix_to_postfix(context, ParserInput("one ? ? two '::' three")) assert len(res) == 1 assert res[0].out == [ cnode("one", start=0, end=0, source="one"), @@ -896,7 +897,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(concepts_map, sya_def) - res = parser.infix_to_postfix(context, "one less than two less than three") + res = parser.infix_to_postfix(context, ParserInput("one less than two less than three")) assert len(res) == 1 assert res[0].errors == [NoneAssociativeSequenceErrorNode(concepts_map["less than"], 2, 8)] @@ -909,7 +910,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() expression = "suffixed twenties" - res = parser.infix_to_postfix(context, expression) + res = parser.infix_to_postfix(context, ParserInput(expression)) expected = [cnode("twenties", 2, 2, "twenties"), "suffixed"] expected_array = compute_expected_array(cmap, expression, expected) @@ -920,7 +921,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() text = "one plus two mult three" - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -939,7 +940,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() text = "suffixed 1 + 1" - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -962,7 +963,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() text = "suffixed twenty one" - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert len(res) == 2 assert res[1].status @@ -981,7 +982,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() text = "one plus 1 + 1 suffixed two" - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -1020,7 +1021,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -1043,7 +1044,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @@ -1068,7 +1069,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -1088,7 +1089,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body expected_end = len(list(Tokenizer(text))) - 2 @@ -1108,7 +1109,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) wrapper = res.body lexer_nodes = res.body.body @@ -1132,7 +1133,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): """ sheerka, context, parser = self.init_parser() - res = parser.parse(context, text) + res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) @@ -1141,7 +1142,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_cannot_parse_empty_string(self): sheerka, context, parser = self.init_parser({}, None) - res = parser.parse(context, "") + res = parser.parse(context, ParserInput("")) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py index 22fe8e3..0f0e8b4 100644 --- a/tests/parsers/test_UnrecognizedNodeParser.py +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -135,7 +135,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE) assert concept.compiled["d"][0].status assert concept.compiled["d"][0].who == "parsers.Python" - assert concept.compiled["d"][0].body.source == "1 + 2" + assert concept.compiled["d"][0].body.source == " 1 + 2 " assert len(concept.compiled["e"]) == 1 assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE) @@ -171,13 +171,13 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert len(res.body.concept.compiled["a"]) == 1 assert res.body.concept.compiled["a"][0].status assert res.body.concept.compiled["a"][0].who == "parsers.Python" - assert res.body.concept.compiled["a"][0].body.source == "1" + assert res.body.concept.compiled["a"][0].body.source == "1 " assert res.body.concept.compiled["b"] == concepts_map["mult"] assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE) assert res.body.concept.compiled["b"].compiled["a"][0].status assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python" - assert res.body.concept.compiled["b"].compiled["a"][0].body.source == "2" + assert res.body.concept.compiled["b"].compiled["a"][0].body.source == " 2 " assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE) assert res.body.concept.compiled["b"].compiled["b"][0].status