Introduced ParserInput

This commit is contained in:
2020-05-25 18:09:12 +02:00
parent c79403443f
commit 479461c0a4
35 changed files with 768 additions and 480 deletions
+6 -6
View File
@@ -226,28 +226,28 @@ def only_parsers_results(context, return_values):
parents=return_values)
def parse_unrecognized(context, tokens, parsers):
def parse_unrecognized(context, source, parsers):
"""
Try to recognize concepts or code from tokens using the given parsers
Try to recognize concepts or code from source using the given parsers
:param context:
:param tokens:
:param source:
:param parsers:
:return:
"""
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
sheerka = context.sheerka
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
with context.push(desc=f"Parsing unrecognized '{source}'") as sub_context:
# disable all parsers but the following ones
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
for parser in parsers:
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
sub_context.add_inputs(source=tokens)
sub_context.add_inputs(source=source)
to_parse = sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
res = sheerka.execute(sub_context, to_parse, steps)
sub_context.add_values(return_values=res)
+157 -2
View File
@@ -1,23 +1,178 @@
import core.utils
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
NO_MATCH = "** No Match **"
class ParserInput:
"""
Helper class that tokenizes the input once for all
"""
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
self.length = len(tokens) if tokens else None
self.yield_oef = yield_oef
self.start = start or 0
self.end = end + 1 if end else None
self.sub_text = None
self.sub_tokens = None
self.pos = None
self.token = None
self.from_tokens = tokens is not None
def __repr__(self):
from_tokens = "from_tokens" if self.from_tokens else ""
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=True):
if self.tokens is None:
self.tokens = list(Tokenizer(self.text))
self.length = len(self.tokens)
if self.end is None:
self.end = self.length
self.yield_oef = yield_oef
self.pos = self.start - 1
self.token = None
return self
def as_text(self, custom_switcher=None, tracker=None):
if custom_switcher is None:
if self.sub_text:
return self.sub_text
if self.start == 0 and self.end == self.length:
self.sub_text = self.text
return self.sub_text
self.sub_text = self.get_text_from_tokens(self.tokens[self.start:self.end])
return self.sub_text
else:
return self.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
def as_tokens(self):
if self.sub_tokens:
return self.sub_tokens
if self.start == 0 and self.end == self.length:
self.sub_tokens = self.tokens
return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end]
return self.sub_tokens
def next_token(self, skip_whitespace=True):
self.pos += 1
if self.pos >= self.end:
if self.yield_oef:
self.token = Token(TokenKind.EOF, "", -1, -1, -1)
return False
self.token = self.tokens[self.pos]
if self.token.type == TokenKind.EOF and not self.yield_oef:
return False
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos == self.end:
return False
self.token = self.tokens[self.pos]
return self.pos < self.end
def is_empty(self):
if self.text.strip() == "":
return True
if self.end == self.start:
return True
if self.end and self.end == self.start + 1 and self.tokens[self.start].type == TokenKind.WHITESPACE:
return True
return False
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
"""
Create the source code, from the list of token
:param tokens: list of tokens
:param custom_switcher: to override the behaviour (the return value) of some token
:param tracker: keep track of the original token value when custom switched
:return:
"""
if tokens is None:
return ""
res = ""
if not hasattr(tokens, "__iter__"):
tokens = [tokens]
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
if custom_switcher:
switcher.update(custom_switcher)
for token in tokens:
value = switcher.get(token.type, lambda t: t.value)(token)
res += value
if tracker is not None and token.type in custom_switcher:
tracker[value] = token.value
return res
class SheerkaExecute(BaseService):
"""
Manage the execution of a process flow
"""
NAME = "Execute"
PARSERS_INPUTS_ENTRY = "ParserInput" # entry for admin or internal variables
def __init__(self, sheerka):
super().__init__(sheerka)
self.pi_cache = None
def initialize(self):
self.sheerka.bind_service_method(self.execute)
self.pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20)
self.sheerka.cache_manager.register_cache(self.PARSERS_INPUTS_ENTRY, self.pi_cache, False)
def get_parser_input(self, text, tokens=None):
"""
Returns new or existing parser input
:param text:
:param tokens:
:param length:
:return:
"""
if isinstance(text, ParserInput):
return text
if tokens is None or self.pi_cache.has(text):
pi = self.pi_cache.get(text)
if pi is None: # when CacheManager.cache_only is True
pi = ParserInput(text)
self.pi_cache.put(text, pi)
return pi
key = text or ParserInput.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens)
self.pi_cache.put(key, pi)
return pi
def call_parsers(self, context, return_values):
# return_values must be a list
@@ -56,7 +211,7 @@ class SheerkaExecute(BaseService):
for return_value in inputs_for_this_group:
to_parse = return_value.body.body \
to_parse = self.get_parser_input(return_value.body.body) \
if self.sheerka.isinstance(return_value.body, BuiltinConcepts.USER_INPUT) \
else return_value.body
+3
View File
@@ -104,6 +104,9 @@ class Token:
else:
return str(self.value)
def clone(self):
return Token(self.type, self.value, self.index, self.line, self.column)
@dataclass()
class LexerError(Exception):
+3 -1
View File
@@ -1,5 +1,6 @@
import core.builtin_helpers
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.DefaultParser import IsaConceptNode
@@ -28,10 +29,11 @@ class AddConceptInSetEvaluator(OneReturnValueEvaluator):
def eval(self, context, return_value):
def _resolve(name_node):
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, name_node.tokens)
ret_val = sheerka.ret(
self.name,
True,
sheerka.new(BuiltinConcepts.USER_INPUT, body=name_node.tokens, user_name="N/A"))
sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input, user_name="N/A"))
with context.push(desc=f"Recognizing '{name_node}'") as sub_context:
r = sheerka.execute(sub_context, ret_val, ALL_STEPS)
+1 -1
View File
@@ -7,7 +7,7 @@ from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
class LexerNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
Evaluate a list of LexerNode (ConceptNode | SourceCodeNode | UnrecognizedTokenNode...)
"""
NAME = "LexerNode"
+21 -15
View File
@@ -3,6 +3,7 @@ from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import DEFINITION_TYPE_BNF, Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from core.utils import strip_tokens
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
@@ -250,26 +251,27 @@ class AtomNodeParser(BaseNodeParser):
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.next_token(False):
while self.parser_input.next_token(False):
for concept_parser in concept_parser_helpers:
concept_parser.reset()
token = self.token
token = self.parser_input.token
pos = self.parser_input.pos
try:
for concept_parser in concept_parser_helpers:
if concept_parser.eat_token(self.token, self.pos):
if concept_parser.eat_token(token, pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, self.pos)
concept_parser.eat_unrecognized(token, pos)
continue
if len(concepts) == 1:
for concept_parser in concept_parser_helpers:
concept_parser.eat_concept(concepts[0], self.pos)
concept_parser.eat_concept(concepts[0], pos)
continue
# make the cartesian product
@@ -284,7 +286,7 @@ class AtomNodeParser(BaseNodeParser):
for concept in concepts:
clone = concept_parser.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
clone.eat_concept(concept, pos)
concept_parser_helpers = temp_res
finally:
@@ -298,22 +300,26 @@ class AtomNodeParser(BaseNodeParser):
return concept_parser_helpers
def get_by_name(self, parser_input):
def get_by_name(self):
"""
Try to recognize the full parser input as a concept name
:return:
"""
source = self.get_input_as_text(parser_input)
source = self.parser_input.as_text()
concepts = self.sheerka.get_by_name(source.strip())
if not self.sheerka.is_known(concepts):
return None
concepts = [concepts] if isinstance(concepts, Concept) else concepts
res = []
start, end = self.get_tokens_boundaries(self.tokens)
start, end = self.get_tokens_boundaries(self.parser_input.as_tokens())
for concept in concepts:
parser_helper = AtomConceptParserHelper(None)
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
parser_helper.sequence.append(ConceptNode(
concept,
start,
end,
strip_tokens(self.parser_input.as_tokens(), True), source))
res.append(parser_helper)
return res
@@ -331,7 +337,7 @@ class AtomNodeParser(BaseNodeParser):
if isinstance(node, ConceptNode):
if len(node.concept.metadata.variables) > 0:
node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
node.tokens = self.tokens[node.start:node.end + 1]
node.tokens = self.parser_input.tokens[node.start:node.end + 1]
node.fix_source()
if parser_helper in valid_parser_helpers:
@@ -341,8 +347,8 @@ class AtomNodeParser(BaseNodeParser):
return valid_parser_helpers
def parse(self, context, parser_input):
if parser_input == "":
def parse(self, context, parser_input: ParserInput):
if parser_input.is_empty():
return context.sheerka.ret(
self.name,
False,
@@ -356,7 +362,7 @@ class AtomNodeParser(BaseNodeParser):
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
sequences = self.get_concepts_sequences()
if by_name := self.get_by_name(parser_input):
if by_name := self.get_by_name():
sequences.extend(by_name)
parser_helpers = self.get_valid(sequences)
@@ -386,4 +392,4 @@ class AtomNodeParser(BaseNodeParser):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
+41 -37
View File
@@ -5,7 +5,7 @@ from enum import Enum
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, LexerError, Token, Keywords
from parsers.BaseParser import Node, BaseParser, ErrorNode
@@ -86,7 +86,6 @@ class UnrecognizedTokensNode(LexerNode):
else:
self.end -= 1
def has_open_paren(self):
return self.parenthesis_count > 0
@@ -598,13 +597,13 @@ class BaseNodeParser(BaseParser):
else:
self.concepts_by_first_keyword = None
self.token = None
self.pos = -1
self.tokens = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context: ExecutionContext = None
# self.text = None
# self.sheerka = None
def init_from_concepts(self, context, concepts, **kwargs):
"""
@@ -617,43 +616,48 @@ class BaseNodeParser(BaseParser):
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
def reset_parser(self, context, text):
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
self.text = text
self.parser_input = parser_input
try:
self.tokens = list(self.get_input_as_tokens(text))
self.parser_input.reset(False)
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
return True
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
#
#
# self.token = None
# self.pos = -1
# return True
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.parser_input.next_token()
# return error
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
+28
View File
@@ -9,6 +9,25 @@ from core.sheerka_logger import get_logger
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
# # keep a cache for the parser input
# pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20)
#
#
# def get_parser_input(text, tokens=None, length=None):
# """
# Returns new or existing parser input
# :param text:
# :param tokens:
# :param length:
# :return:
# """
# if tokens is None or pi_cache.has(text):
# return pi_cache.get(text)
# pi = ParserInput(text, tokens, length)
# pi_cache.put(text, pi)
# return pi
@dataclass()
class Node:
pass
@@ -84,6 +103,9 @@ class BaseParser:
self.enabled = enabled
self.error_sink = []
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
def __eq__(self, other):
if not isinstance(other, self.__class__):
@@ -99,6 +121,12 @@ class BaseParser:
def parse(self, context, parser_input):
pass
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.parser_input.next_token()
return error
@property
def has_error(self):
return len(self.error_sink) > 0
+36 -26
View File
@@ -14,6 +14,7 @@ from cache.Cache import Cache
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode
from parsers.BaseParser import ErrorNode
@@ -149,7 +150,7 @@ class ConceptExpression(ParsingExpression):
return NonTerminalNode(self,
node.start,
node.end,
parser_helper.parser.tokens[node.start: node.end + 1],
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
@@ -184,7 +185,11 @@ class Sequence(ParsingExpression):
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self,
init_pos,
end_pos,
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -206,7 +211,7 @@ class OrderedChoice(ParsingExpression):
return NonTerminalNode(self,
init_pos,
node.end,
parser_helper.parser.tokens[init_pos: node.end + 1],
parser_helper.parser.parser_input.tokens[init_pos: node.end + 1],
[node])
parser_helper.seek(init_pos) # backtrack
@@ -237,7 +242,7 @@ class Optional(ParsingExpression):
self,
node.start,
node.end,
parser_helper.parser.tokens[node.start: node.end + 1],
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
parser_helper.seek(init_pos) # backtrack
@@ -303,7 +308,8 @@ class ZeroOrMore(Repetition):
if len(children) == 0:
return NonTerminalNode(self, init_pos, -1, [], [])
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -343,7 +349,11 @@ class OneOrMore(Repetition):
if len(children) == 0: # if nothing is found, it's an error
return None
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self,
init_pos,
end_pos,
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -507,24 +517,24 @@ class BnfConceptParserHelper:
return False
self.pos += 1
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
return self.token.type != TokenKind.EOF
def seek(self, pos):
self.pos = pos
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
def has_error(self):
return len(self.errors) > 0
def is_locked(self):
return self.parser.pos <= self.pos or self.has_error()
return self.parser.parser_input.pos <= self.pos or self.has_error()
def eat_concept(self, concept, token):
if self.is_locked():
@@ -546,8 +556,8 @@ class BnfConceptParserHelper:
self.errors.append(GrammarErrorNode(error_msg))
return
self.pos = self.parser.pos
self.token = self.parser.tokens[self.pos]
self.pos = self.parser.parser_input.pos
self.token = self.parser.parser_input.tokens[self.pos]
# parse
node = parsing_expression.parse(self)
@@ -557,15 +567,15 @@ class BnfConceptParserHelper:
self.bnf_parsed = True
else:
self.debug.append(("Rewind", token))
self.unrecognized_tokens.add_token(token, self.parser.pos)
self.pos = self.parser.pos # reset position
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
self.pos = self.parser.parser_input.pos # reset position
def eat_unrecognized(self, token):
if self.is_locked():
return
self.debug.append(token)
self.unrecognized_tokens.add_token(token, self.parser.pos)
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
@@ -631,7 +641,7 @@ class BnfConceptParserHelper:
concept,
underlying.start,
underlying.end,
self.parser.tokens[underlying.start: underlying.end + 1],
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
return concept_node
@@ -779,9 +789,9 @@ class BnfNodeParser(BaseNodeParser):
concept_parser_helpers = [BnfConceptParserHelper(self)]
while self.next_token(False):
while self.parser_input.next_token(False):
token = self.get_token()
token = self.parser_input.token
try:
concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False)
@@ -837,7 +847,7 @@ class BnfNodeParser(BaseNodeParser):
resolved = self.resolve_parsing_expression(expression, already_seen or set())
sub_context.add_values(return_values=resolved)
self.concepts_grammars.put(concept.id, resolved)
self.concepts_grammars.put(concept.id, resolved)
if self.has_error:
return None
@@ -929,7 +939,7 @@ class BnfNodeParser(BaseNodeParser):
return self.context.concepts[concept]
return self.sheerka.get_by_key(concept)
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
"""
parser_input can be string, but text can also be an list of tokens
:param context:
@@ -940,11 +950,11 @@ class BnfNodeParser(BaseNodeParser):
context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
sheerka = context.sheerka
if parser_input == "" or isinstance(parser_input, list) and len(parser_input) == 0:
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
body=parser_input.as_text(),
reason=BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
@@ -966,7 +976,7 @@ class BnfNodeParser(BaseNodeParser):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
ret = []
for parser_helper in valid_parser_helpers:
@@ -977,13 +987,13 @@ class BnfNodeParser(BaseNodeParser):
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
source=parser_input.as_text(),
body=parser_helper.sequence,
try_parsed=parser_helper.sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
self.log_result(context, parser_input.as_text(), ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
self.log_multiple_results(context, parser_input.as_text(), ret)
return ret
+4 -3
View File
@@ -115,7 +115,6 @@ class BnfParser(BaseParser):
def parse(self, context: ExecutionContext, parser_input):
tree = None
try:
self.reset_parser(context, parser_input)
tree = self.parse_choice()
@@ -124,7 +123,10 @@ class BnfParser(BaseParser):
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
except LexerError as e:
self.add_error(e, False)
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
@@ -283,4 +285,3 @@ class BnfParser(BaseParser):
expression.rule_name = token.value
self.next_token()
return expression
+39 -66
View File
@@ -1,12 +1,13 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from dataclasses import dataclass, field
import core.builtin_helpers
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import Tokenizer, TokenKind, Keywords
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
from parsers.BnfParser import BnfParser
from core.sheerka.Sheerka import ExecutionContext
@dataclass()
@@ -90,11 +91,10 @@ class DefConceptNode(DefaultParserNode):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
ParserResultConcept) and hasattr(
prop_value.body.body, "ast_"):
if isinstance(prop_value, ReturnValueConcept) and \
isinstance(prop_value.body, ParserResultConcept) and \
hasattr(prop_value.body.body, "ast_"):
asts[part_key] = prop_value
# asts[part_key] = prop_value.body.body.ast_
return asts
@@ -111,11 +111,6 @@ class DefaultParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 60)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
@staticmethod
def fix_indentation(tokens):
@@ -129,6 +124,7 @@ class DefaultParser(BaseParser):
:param tokens:
:return:
"""
tokens = tokens.copy() # do not modify ParserInput.tokens
if tokens[0].type != TokenKind.COLON:
return tokens
@@ -143,6 +139,8 @@ class DefaultParser(BaseParser):
indent_size = len(tokens[2].value)
# now fix the other indentations
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
# as I now have an editor in interactive mode
i = 3
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
@@ -152,44 +150,22 @@ class DefaultParser(BaseParser):
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1] = tokens[i + 1].clone()
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[3:]
def reset_parser(self, context, text):
def reset_parser(self, context, parser_input):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
self.parser_input.reset()
self.parser_input.next_token()
self.text = text
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=True):
try:
self._current = next(self.lexer_iter)
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
# default parser can only manage string text
if not isinstance(parser_input, str):
if parser_input.from_tokens:
ret = context.sheerka.ret(
self.name,
False,
@@ -197,12 +173,14 @@ class DefaultParser(BaseParser):
self.log_result(context, parser_input, ret)
return ret
tree = None
try:
self.reset_parser(context, parser_input)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
# If a error is found it must be sent to error_sink
# tree must contain what was recognized
@@ -210,26 +188,20 @@ class DefaultParser(BaseParser):
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=text,
# body=self.error_sink if self.has_error else tree,
# try_parsed=tree)
body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
body)
self.log_result(context, parser_input, ret)
self.log_result(context, parser_input.as_text(), ret)
return ret
def parse_statement(self):
token = self.get_token()
token = self.parser_input.token
if token.value == Keywords.DEF:
self.next_token()
self.parser_input.next_token()
self.context.log("Keyword DEF found.", self.name)
return self.parse_def_concept(token)
else:
@@ -282,23 +254,23 @@ class DefaultParser(BaseParser):
return concept_name
keyword = []
token = self.get_token()
token = self.parser_input.token
if token.value != Keywords.ISA:
return self.add_error(CannotHandleErrorNode([token], ""))
keyword.append(token)
self.next_token()
self.parser_input.next_token()
set_name = self.parse_concept_name()
return IsaConceptNode(keyword, concept_name, set_name)
def parse_concept_name(self):
tokens = []
token = self.get_token()
token = self.parser_input.token
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
tokens.append(token)
self.next_token()
token = self.get_token()
self.parser_input.next_token()
token = self.parser_input.token
if len(tokens) == 0:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
@@ -319,7 +291,7 @@ class DefaultParser(BaseParser):
Keywords.POST: None,
}
current_part = Keywords.CONCEPT
token = self.get_token()
token = self.parser_input.token
first_token = token
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
@@ -334,18 +306,18 @@ class DefaultParser(BaseParser):
else:
tokens_found_by_parts[keyword] = [token]
current_part = keyword
self.next_token()
self.parser_input.next_token()
else:
tokens_found_by_parts[current_part].append(token)
self.next_token(False)
self.parser_input.next_token(False)
token = self.get_token()
token = self.parser_input.token
return first_token, tokens_found_by_parts
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1
token = self.get_token()
token = self.parser_input.token
if first_token.value != Keywords.CONCEPT:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
@@ -431,10 +403,11 @@ class DefaultParser(BaseParser):
# ask the other parsers if they recognize the tokens
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
to_parse = self.sheerka.ret(
sub_context.who,
True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input))
steps = [BuiltinConcepts.PARSING]
parsed = self.sheerka.execute(sub_context, to_parse, steps)
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed)
+3 -4
View File
@@ -1,4 +1,5 @@
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseParser import BaseParser
@@ -10,12 +11,10 @@ class EmptyStringParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "EmptyString", 90)
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
sheerka = context.sheerka
if isinstance(parser_input, str) and parser_input.strip() == "" or \
isinstance(parser_input, list) and parser_input == [] or \
parser_input is None:
if parser_input.is_empty():
ret = sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
+13 -10
View File
@@ -1,11 +1,12 @@
import logging
import core.builtin_helpers
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, TokenKind, LexerError
from core.utils import str_concept
from parsers.BaseParser import BaseParser
import core.builtin_helpers
class ExactConceptParser(BaseParser):
@@ -19,7 +20,7 @@ class ExactConceptParser(BaseParser):
BaseParser.__init__(self, "ExactConcept", 80)
self.max_word_size = max_word_size
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
"""
text can be string, but text can also be an list of tokens
:param context:
@@ -31,6 +32,7 @@ class ExactConceptParser(BaseParser):
sheerka = context.sheerka
try:
parser_input.reset()
words = self.get_words(parser_input)
except LexerError as e:
context.log(f"Error found in tokenizer {e}", self.name)
@@ -38,8 +40,8 @@ class ExactConceptParser(BaseParser):
if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
context.log(f"Max words reached. Stopping.", self.name)
too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input)
body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input, reason=too_long)
too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input.as_text())
body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=too_long)
return sheerka.ret(self.name, False, body)
already_recognized = [] # keep track of the concepts founds
@@ -78,12 +80,13 @@ class ExactConceptParser(BaseParser):
already_recognized.append(concept)
by_name = sheerka.resolve(self.get_input_as_text(parser_input))
by_name = sheerka.resolve(parser_input.as_text())
core.builtin_helpers.set_is_evaluated(by_name)
recognized = self.merge_concepts(already_recognized, by_name)
if len(recognized) == 0:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT,
body=parser_input.as_text()))
self.log_result(context, parser_input, ret)
return ret
else:
@@ -94,10 +97,10 @@ class ExactConceptParser(BaseParser):
self.log_multiple_results(context, parser_input, res)
return res
def get_words(self, text):
tokens = self.get_input_as_tokens(text)
@staticmethod
def get_words(parser_input):
res = []
for t in tokens:
for t in parser_input.as_tokens():
if t.type == TokenKind.EOF:
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
@@ -173,6 +176,6 @@ class ExactConceptParser(BaseParser):
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
source=parser_input.as_text(),
body=concept,
try_parsed=concept))
+76 -73
View File
@@ -4,6 +4,7 @@ from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BnfNodeParser import ConceptNode
@@ -28,7 +29,7 @@ class PythonNode(Node):
self.concepts = concepts or {} # when concepts are recognized in the expression
# def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
# return "PythonNode(parser_input='" + self.parser_input + "', ast=" + self.get_dump(self.ast_) + ")"
def __repr__(self):
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
@@ -57,77 +58,6 @@ class PythonNode(Node):
return dump
class PythonParser(BaseParser):
"""
Parse Python scripts
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input):
sheerka = context.sheerka
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value)
}
try:
tracker = {}
source = self.get_input_as_text(parser_input, python_switcher, tracker)
source = source.strip()
parser_input = parser_input if isinstance(parser_input, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
error_node = PythonErrorNode(parser_input, error)
self.error_sink.append(error_node)
except LexerError as e:
self.error_sink.append(e)
if self.has_error:
ret = sheerka.ret(
self.name,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=PythonNode(parser_input, tree, tracker),
try_parsed=None))
self.log_result(context, parser_input, ret)
return ret
def try_parse_expression(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
class PythonGetNamesVisitor(ast.NodeVisitor):
"""
This visitor will find all the name declared in the ast
@@ -206,7 +136,8 @@ class LexerNodeParserHelperForPython:
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
result = python_parser.parse(sub_context, parser_input)
sub_context.add_values(return_values=result)
if result.status:
@@ -216,3 +147,75 @@ class LexerNodeParserHelperForPython:
return python_node
return result.body # the error
class PythonParser(BaseParser):
"""
Parse Python scripts
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input: ParserInput):
sheerka = context.sheerka
tree = None
tracker = {} # to keep track of concept tokens (c:xxx:)
python_switcher = {
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value)
}
try:
parser_input.reset()
source_code = parser_input.as_text(python_switcher, tracker)
source_code = source_code.strip()
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source_code)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement(source_code)
if not res:
error_node = PythonErrorNode(parser_input.as_text(), error)
self.error_sink.append(error_node)
except LexerError as e:
self.error_sink.append(e)
if self.has_error:
ret = sheerka.ret(
self.name,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input.as_text(),
body=PythonNode(source_code, tree, tracker),
try_parsed=None))
self.log_result(context, parser_input.as_text(), ret)
return ret
def try_parse_expression(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self, text):
try:
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
+3 -1
View File
@@ -1,4 +1,5 @@
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import ConceptNode
from parsers.PythonParser import PythonParser
@@ -77,8 +78,9 @@ class PythonWithConceptsParser(BaseParser):
to_parse += node.source
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
result = python_parser.parse(sub_context, parser_input)
if result.status:
python_node = result.body.body
+36 -41
View File
@@ -6,7 +6,7 @@ from typing import List
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser
@@ -313,21 +313,6 @@ class InFixToPostFix:
"""
return len(self.stack) > 0 and isinstance(self.stack[-1], type)
def _get_lexer_nodes_from_unrecognized(self):
"""
Use the source of self.unrecognized_tokens gto find concepts or source code
:return:
"""
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
if not only_parsers_results.status:
return None
return builtin_helpers.get_lexer_nodes(
only_parsers_results.body.body,
self.unrecognized_tokens.start,
self.unrecognized_tokens.tokens)
def _make_source_code_with_concept(self, start, rpar_token, end):
"""
@@ -440,7 +425,10 @@ class InFixToPostFix:
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences:
# There are more than one solution found
@@ -482,7 +470,10 @@ class InFixToPostFix:
:return: list of function_parser_res
"""
self.unrecognized_tokens.fix_source()
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences is None:
return None
@@ -908,13 +899,13 @@ class SyaNodeParser(BaseNodeParser):
self.concepts_by_first_keyword = {}
self.sya_definitions = {}
self.token = None
self.pos = -1
self.tokens = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context: ExecutionContext = None
# self.text = None
# self.sheerka = None
def init_from_concepts(self, context, concepts, **kwargs):
super().init_from_concepts(context, concepts)
@@ -954,15 +945,15 @@ class SyaNodeParser(BaseNodeParser):
return sya_concept_def
def infix_to_postfix(self, context, text):
def infix_to_postfix(self, context, parser_input: ParserInput):
"""
Implementing Shunting Yard Algorithm
:param context:
:param text:
:param parser_input:
:return:
"""
if not self.reset_parser(context, text):
if not self.reset_parser(context, parser_input):
return None
forked = []
@@ -978,32 +969,32 @@ class SyaNodeParser(BaseNodeParser):
forked.clear()
res = [InFixToPostFix(context)]
while self.next_token(False):
while self.parser_input.next_token(False):
for infix_to_postfix in res:
infix_to_postfix.reset()
token = self.get_token()
token = self.parser_input.token
try:
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
# little optim, no need to lock, unlock or get the concept when parenthesis
for infix_to_postfix in res:
infix_to_postfix.eat_token(token, self.pos)
infix_to_postfix.eat_token(token, self.parser_input.pos)
continue
for infix_to_postfix in res:
if infix_to_postfix.eat_token(token, self.pos):
if infix_to_postfix.eat_token(token, self.parser_input.pos):
infix_to_postfix.lock()
concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
if not concepts:
for infix_to_postfix in res:
infix_to_postfix.eat_unrecognized(token, self.pos)
infix_to_postfix.eat_unrecognized(token, self.parser_input.pos)
continue
if len(concepts) == 1:
for infix_to_postfix in res:
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
infix_to_postfix.eat_concept(concepts[0], token, self.parser_input.pos)
continue
# make the cartesian product
@@ -1012,7 +1003,7 @@ class SyaNodeParser(BaseNodeParser):
for concept in concepts:
clone = infix_to_postfix.clone()
temp_res.append(clone)
clone.eat_concept(concept, token, self.pos)
clone.eat_concept(concept, token, self.parser_input.pos)
res = temp_res
finally:
@@ -1036,14 +1027,15 @@ class SyaNodeParser(BaseNodeParser):
while len(item.nodes) > 0:
res = self.postfix_to_item(sheerka, item.nodes)
if isinstance(res, PostFixToItem):
items.append(ConceptNode(res.concept, res.start, res.end, self.tokens[res.start: res.end + 1]))
items.append(
ConceptNode(res.concept, res.start, res.end, self.parser_input.tokens[res.start: res.end + 1]))
else:
items.append(res)
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
isinstance(res, UnrecognizedTokensNode)
item.nodes = items
item.fix_all_pos()
item.tokens = self.tokens[item.start:item.end + 1]
item.tokens = self.parser_input.tokens[item.start:item.end + 1]
item.fix_source(True)
return item
@@ -1069,14 +1061,14 @@ class SyaNodeParser(BaseNodeParser):
return PostFixToItem(concept, start, end, has_unrecognized)
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if parser_input == "":
if parser_input.is_empty():
return context.sheerka.ret(
self.name,
False,
@@ -1096,7 +1088,7 @@ class SyaNodeParser(BaseNodeParser):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
for infix_to_postfix in valid_infix_to_postfixs:
sequence = []
@@ -1106,7 +1098,10 @@ class SyaNodeParser(BaseNodeParser):
has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \
isinstance(item, UnrecognizedTokensNode)
if isinstance(item, PostFixToItem):
to_insert = ConceptNode(item.concept, item.start, item.end, self.tokens[item.start: item.end + 1])
to_insert = ConceptNode(item.concept,
item.start,
item.end,
self.parser_input.tokens[item.start: item.end + 1])
else:
to_insert = item
sequence.insert(0, to_insert)
+1 -1
View File
@@ -109,7 +109,7 @@ class UnrecognizedNodeParser(BaseParser):
_validate_concept(value)
elif isinstance(value, UnrecognizedTokensNode):
res = parse_unrecognized(context, value.tokens, PARSERS)
res = parse_unrecognized(context, value.source, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[name] = res.body.body
+2
View File
@@ -3,6 +3,7 @@ from logging import Logger
import core.utils
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from sheerkapickle import utils, tags, handlers
@@ -37,6 +38,7 @@ class SheerkaPickler:
from parsers.BaseParser import BaseParser
from evaluators.BaseEvaluator import BaseEvaluator
self.to_reduce.append(ToReduce(lambda o: isinstance(o, (BaseParser, BaseEvaluator)), lambda o: o.name))
self.to_reduce.append(ToReduce(lambda o: isinstance(o, ParserInput), lambda o: o.as_text()))
def flatten(self, obj):
if utils.is_primitive(obj):
+4 -1
View File
@@ -1,5 +1,6 @@
from core.builtin_concepts import UserInputConcept, ReturnValueConcept, BuiltinConcepts
from core.sheerka.Sheerka import Sheerka
from core.sheerka.services.SheerkaExecute import ParserInput
from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseParser import BaseParser
from sheerkapickle.handlers import BaseHandler, registry
@@ -79,7 +80,9 @@ class UserInputHandler(ConceptHandler):
def flatten(self, obj: UserInputConcept, data):
data[CONCEPT_ID] = (obj.key, obj.id)
data["user_name"] = obj.user_name
data["text"] = BaseParser.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else obj.text
data["text"] = BaseParser.get_text_from_tokens(obj.text) if isinstance(obj.text, list) else \
obj.text.as_text() if isinstance(obj.text, ParserInput) else \
obj.text
return data
def new(self, data):