Introduced ParserInput
This commit is contained in:
@@ -3,6 +3,7 @@ from dataclasses import dataclass
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import DEFINITION_TYPE_BNF, Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Tokenizer
|
||||
from core.utils import strip_tokens
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
@@ -250,26 +251,27 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.next_token(False):
|
||||
while self.parser_input.next_token(False):
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
|
||||
token = self.token
|
||||
token = self.parser_input.token
|
||||
pos = self.parser_input.pos
|
||||
|
||||
try:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.eat_token(self.token, self.pos):
|
||||
if concept_parser.eat_token(token, pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, self.pos)
|
||||
concept_parser.eat_unrecognized(token, pos)
|
||||
continue
|
||||
|
||||
if len(concepts) == 1:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_concept(concepts[0], self.pos)
|
||||
concept_parser.eat_concept(concepts[0], pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
@@ -284,7 +286,7 @@ class AtomNodeParser(BaseNodeParser):
|
||||
for concept in concepts:
|
||||
clone = concept_parser.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, self.pos)
|
||||
clone.eat_concept(concept, pos)
|
||||
|
||||
concept_parser_helpers = temp_res
|
||||
finally:
|
||||
@@ -298,22 +300,26 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_by_name(self, parser_input):
|
||||
def get_by_name(self):
|
||||
"""
|
||||
Try to recognize the full parser input as a concept name
|
||||
:return:
|
||||
"""
|
||||
source = self.get_input_as_text(parser_input)
|
||||
source = self.parser_input.as_text()
|
||||
concepts = self.sheerka.get_by_name(source.strip())
|
||||
if not self.sheerka.is_known(concepts):
|
||||
return None
|
||||
|
||||
concepts = [concepts] if isinstance(concepts, Concept) else concepts
|
||||
res = []
|
||||
start, end = self.get_tokens_boundaries(self.tokens)
|
||||
start, end = self.get_tokens_boundaries(self.parser_input.as_tokens())
|
||||
for concept in concepts:
|
||||
parser_helper = AtomConceptParserHelper(None)
|
||||
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
|
||||
parser_helper.sequence.append(ConceptNode(
|
||||
concept,
|
||||
start,
|
||||
end,
|
||||
strip_tokens(self.parser_input.as_tokens(), True), source))
|
||||
res.append(parser_helper)
|
||||
|
||||
return res
|
||||
@@ -331,7 +337,7 @@ class AtomNodeParser(BaseNodeParser):
|
||||
if isinstance(node, ConceptNode):
|
||||
if len(node.concept.metadata.variables) > 0:
|
||||
node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
|
||||
node.tokens = self.tokens[node.start:node.end + 1]
|
||||
node.tokens = self.parser_input.tokens[node.start:node.end + 1]
|
||||
node.fix_source()
|
||||
|
||||
if parser_helper in valid_parser_helpers:
|
||||
@@ -341,8 +347,8 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
if parser_input.is_empty():
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -356,7 +362,7 @@ class AtomNodeParser(BaseNodeParser):
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
sequences = self.get_concepts_sequences()
|
||||
if by_name := self.get_by_name(parser_input):
|
||||
if by_name := self.get_by_name():
|
||||
sequences.extend(by_name)
|
||||
|
||||
parser_helpers = self.get_valid(sequences)
|
||||
@@ -386,4 +392,4 @@ class AtomNodeParser(BaseNodeParser):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
|
||||
|
||||
@@ -5,7 +5,7 @@ from enum import Enum
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, LexerError, Token, Keywords
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
@@ -86,7 +86,6 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
else:
|
||||
self.end -= 1
|
||||
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
@@ -598,13 +597,13 @@ class BaseNodeParser(BaseParser):
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.tokens = None
|
||||
#
|
||||
# self.context: ExecutionContext = None
|
||||
# self.text = None
|
||||
# self.sheerka = None
|
||||
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
"""
|
||||
@@ -617,43 +616,48 @@ class BaseNodeParser(BaseParser):
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
|
||||
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
self.parser_input = parser_input
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
self.parser_input.reset(False)
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
#
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.parser_input.next_token()
|
||||
# return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
|
||||
@@ -9,6 +9,25 @@ from core.sheerka_logger import get_logger
|
||||
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
|
||||
|
||||
|
||||
# # keep a cache for the parser input
|
||||
# pi_cache = Cache(default=lambda key: ParserInput(key), max_size=20)
|
||||
#
|
||||
#
|
||||
# def get_parser_input(text, tokens=None, length=None):
|
||||
# """
|
||||
# Returns new or existing parser input
|
||||
# :param text:
|
||||
# :param tokens:
|
||||
# :param length:
|
||||
# :return:
|
||||
# """
|
||||
# if tokens is None or pi_cache.has(text):
|
||||
# return pi_cache.get(text)
|
||||
# pi = ParserInput(text, tokens, length)
|
||||
# pi_cache.put(text, pi)
|
||||
# return pi
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Node:
|
||||
pass
|
||||
@@ -84,6 +103,9 @@ class BaseParser:
|
||||
self.enabled = enabled
|
||||
|
||||
self.error_sink = []
|
||||
self.context: ExecutionContext = None
|
||||
self.sheerka = None
|
||||
self.parser_input: ParserInput = None
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
@@ -99,6 +121,12 @@ class BaseParser:
|
||||
def parse(self, context, parser_input):
|
||||
pass
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.parser_input.next_token()
|
||||
return error
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
@@ -14,6 +14,7 @@ from cache.Cache import Cache
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind
|
||||
from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode
|
||||
from parsers.BaseParser import ErrorNode
|
||||
@@ -149,7 +150,7 @@ class ConceptExpression(ParsingExpression):
|
||||
return NonTerminalNode(self,
|
||||
node.start,
|
||||
node.end,
|
||||
parser_helper.parser.tokens[node.start: node.end + 1],
|
||||
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
|
||||
[node])
|
||||
|
||||
|
||||
@@ -184,7 +185,11 @@ class Sequence(ParsingExpression):
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
|
||||
return NonTerminalNode(self,
|
||||
init_pos,
|
||||
end_pos,
|
||||
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
|
||||
children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
@@ -206,7 +211,7 @@ class OrderedChoice(ParsingExpression):
|
||||
return NonTerminalNode(self,
|
||||
init_pos,
|
||||
node.end,
|
||||
parser_helper.parser.tokens[init_pos: node.end + 1],
|
||||
parser_helper.parser.parser_input.tokens[init_pos: node.end + 1],
|
||||
[node])
|
||||
|
||||
parser_helper.seek(init_pos) # backtrack
|
||||
@@ -237,7 +242,7 @@ class Optional(ParsingExpression):
|
||||
self,
|
||||
node.start,
|
||||
node.end,
|
||||
parser_helper.parser.tokens[node.start: node.end + 1],
|
||||
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
|
||||
[node])
|
||||
|
||||
parser_helper.seek(init_pos) # backtrack
|
||||
@@ -303,7 +308,8 @@ class ZeroOrMore(Repetition):
|
||||
if len(children) == 0:
|
||||
return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
|
||||
children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
@@ -343,7 +349,11 @@ class OneOrMore(Repetition):
|
||||
if len(children) == 0: # if nothing is found, it's an error
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
|
||||
return NonTerminalNode(self,
|
||||
init_pos,
|
||||
end_pos,
|
||||
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
|
||||
children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
@@ -507,24 +517,24 @@ class BnfConceptParserHelper:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.parser.tokens[self.pos]
|
||||
self.token = self.parser.parser_input.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.parser.tokens[self.pos]
|
||||
self.token = self.parser.parser_input.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def seek(self, pos):
|
||||
self.pos = pos
|
||||
self.token = self.parser.tokens[self.pos]
|
||||
self.token = self.parser.parser_input.tokens[self.pos]
|
||||
|
||||
def has_error(self):
|
||||
return len(self.errors) > 0
|
||||
|
||||
def is_locked(self):
|
||||
return self.parser.pos <= self.pos or self.has_error()
|
||||
return self.parser.parser_input.pos <= self.pos or self.has_error()
|
||||
|
||||
def eat_concept(self, concept, token):
|
||||
if self.is_locked():
|
||||
@@ -546,8 +556,8 @@ class BnfConceptParserHelper:
|
||||
self.errors.append(GrammarErrorNode(error_msg))
|
||||
return
|
||||
|
||||
self.pos = self.parser.pos
|
||||
self.token = self.parser.tokens[self.pos]
|
||||
self.pos = self.parser.parser_input.pos
|
||||
self.token = self.parser.parser_input.tokens[self.pos]
|
||||
|
||||
# parse
|
||||
node = parsing_expression.parse(self)
|
||||
@@ -557,15 +567,15 @@ class BnfConceptParserHelper:
|
||||
self.bnf_parsed = True
|
||||
else:
|
||||
self.debug.append(("Rewind", token))
|
||||
self.unrecognized_tokens.add_token(token, self.parser.pos)
|
||||
self.pos = self.parser.pos # reset position
|
||||
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
|
||||
self.pos = self.parser.parser_input.pos # reset position
|
||||
|
||||
def eat_unrecognized(self, token):
|
||||
if self.is_locked():
|
||||
return
|
||||
|
||||
self.debug.append(token)
|
||||
self.unrecognized_tokens.add_token(token, self.parser.pos)
|
||||
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
|
||||
|
||||
def manage_unrecognized(self):
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
@@ -631,7 +641,7 @@ class BnfConceptParserHelper:
|
||||
concept,
|
||||
underlying.start,
|
||||
underlying.end,
|
||||
self.parser.tokens[underlying.start: underlying.end + 1],
|
||||
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
|
||||
None,
|
||||
underlying)
|
||||
return concept_node
|
||||
@@ -779,9 +789,9 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
concept_parser_helpers = [BnfConceptParserHelper(self)]
|
||||
|
||||
while self.next_token(False):
|
||||
while self.parser_input.next_token(False):
|
||||
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
try:
|
||||
concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False)
|
||||
@@ -837,7 +847,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
resolved = self.resolve_parsing_expression(expression, already_seen or set())
|
||||
sub_context.add_values(return_values=resolved)
|
||||
|
||||
self.concepts_grammars.put(concept.id, resolved)
|
||||
self.concepts_grammars.put(concept.id, resolved)
|
||||
|
||||
if self.has_error:
|
||||
return None
|
||||
@@ -929,7 +939,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
return self.context.concepts[concept]
|
||||
return self.sheerka.get_by_key(concept)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
parser_input can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
@@ -940,11 +950,11 @@ class BnfNodeParser(BaseNodeParser):
|
||||
context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input == "" or isinstance(parser_input, list) and len(parser_input) == 0:
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input,
|
||||
body=parser_input.as_text(),
|
||||
reason=BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
@@ -966,7 +976,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
|
||||
|
||||
ret = []
|
||||
for parser_helper in valid_parser_helpers:
|
||||
@@ -977,13 +987,13 @@ class BnfNodeParser(BaseNodeParser):
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
source=parser_input.as_text(),
|
||||
body=parser_helper.sequence,
|
||||
try_parsed=parser_helper.sequence)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
self.log_result(context, parser_input.as_text(), ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
self.log_multiple_results(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
@@ -115,7 +115,6 @@ class BnfParser(BaseParser):
|
||||
|
||||
def parse(self, context: ExecutionContext, parser_input):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_choice()
|
||||
@@ -124,7 +123,10 @@ class BnfParser(BaseParser):
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
|
||||
@@ -283,4 +285,3 @@ class BnfParser(BaseParser):
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
return expression
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import Tokenizer, TokenKind, Keywords
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from dataclasses import dataclass, field
|
||||
from parsers.BnfParser import BnfParser
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -90,11 +91,10 @@ class DefConceptNode(DefaultParserNode):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
|
||||
ParserResultConcept) and hasattr(
|
||||
prop_value.body.body, "ast_"):
|
||||
if isinstance(prop_value, ReturnValueConcept) and \
|
||||
isinstance(prop_value.body, ParserResultConcept) and \
|
||||
hasattr(prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
# asts[part_key] = prop_value.body.body.ast_
|
||||
return asts
|
||||
|
||||
|
||||
@@ -111,11 +111,6 @@ class DefaultParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 60)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
@@ -129,6 +124,7 @@ class DefaultParser(BaseParser):
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
tokens = tokens.copy() # do not modify ParserInput.tokens
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
@@ -143,6 +139,8 @@ class DefaultParser(BaseParser):
|
||||
indent_size = len(tokens[2].value)
|
||||
|
||||
# now fix the other indentations
|
||||
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
|
||||
# as I now have an editor in interactive mode
|
||||
i = 3
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
@@ -152,44 +150,22 @@ class DefaultParser(BaseParser):
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1] = tokens[i + 1].clone()
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[3:]
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
def reset_parser(self, context, parser_input):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.parser_input = parser_input
|
||||
self.parser_input.reset()
|
||||
self.parser_input.next_token()
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
self._current = next(self.lexer_iter)
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(parser_input, str):
|
||||
if parser_input.from_tokens:
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -197,12 +173,14 @@ class DefaultParser(BaseParser):
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
@@ -210,26 +188,20 @@ class DefaultParser(BaseParser):
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=text,
|
||||
# body=self.error_sink if self.has_error else tree,
|
||||
# try_parsed=tree)
|
||||
body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, parser_input, ret)
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
if token.value == Keywords.DEF:
|
||||
self.next_token()
|
||||
self.parser_input.next_token()
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
return self.parse_def_concept(token)
|
||||
else:
|
||||
@@ -282,23 +254,23 @@ class DefaultParser(BaseParser):
|
||||
return concept_name
|
||||
|
||||
keyword = []
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
if token.value != Keywords.ISA:
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
keyword.append(token)
|
||||
self.next_token()
|
||||
self.parser_input.next_token()
|
||||
|
||||
set_name = self.parse_concept_name()
|
||||
return IsaConceptNode(keyword, concept_name, set_name)
|
||||
|
||||
def parse_concept_name(self):
|
||||
tokens = []
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
|
||||
tokens.append(token)
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
self.parser_input.next_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
if len(tokens) == 0:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
|
||||
@@ -319,7 +291,7 @@ class DefaultParser(BaseParser):
|
||||
Keywords.POST: None,
|
||||
}
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
first_token = token
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
@@ -334,18 +306,18 @@ class DefaultParser(BaseParser):
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
self.next_token()
|
||||
self.parser_input.next_token()
|
||||
else:
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.next_token(False)
|
||||
self.parser_input.next_token(False)
|
||||
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
return first_token, tokens_found_by_parts
|
||||
|
||||
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||
name_first_token_index = 1
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
if first_token.value != Keywords.CONCEPT:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
@@ -431,10 +403,11 @@ class DefaultParser(BaseParser):
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
|
||||
parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
|
||||
to_parse = self.sheerka.ret(
|
||||
sub_context.who,
|
||||
True,
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input))
|
||||
steps = [BuiltinConcepts.PARSING]
|
||||
parsed = self.sheerka.execute(sub_context, to_parse, steps)
|
||||
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
@@ -10,12 +11,10 @@ class EmptyStringParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(parser_input, str) and parser_input.strip() == "" or \
|
||||
isinstance(parser_input, list) and parser_input == [] or \
|
||||
parser_input is None:
|
||||
if parser_input.is_empty():
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import logging
|
||||
|
||||
import core.builtin_helpers
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Keywords, TokenKind, LexerError
|
||||
from core.utils import str_concept
|
||||
from parsers.BaseParser import BaseParser
|
||||
import core.builtin_helpers
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
@@ -19,7 +20,7 @@ class ExactConceptParser(BaseParser):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
self.max_word_size = max_word_size
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
@@ -31,6 +32,7 @@ class ExactConceptParser(BaseParser):
|
||||
sheerka = context.sheerka
|
||||
|
||||
try:
|
||||
parser_input.reset()
|
||||
words = self.get_words(parser_input)
|
||||
except LexerError as e:
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
@@ -38,8 +40,8 @@ class ExactConceptParser(BaseParser):
|
||||
|
||||
if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input)
|
||||
body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input, reason=too_long)
|
||||
too_long = sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input.as_text())
|
||||
body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=too_long)
|
||||
return sheerka.ret(self.name, False, body)
|
||||
|
||||
already_recognized = [] # keep track of the concepts founds
|
||||
@@ -78,12 +80,13 @@ class ExactConceptParser(BaseParser):
|
||||
|
||||
already_recognized.append(concept)
|
||||
|
||||
by_name = sheerka.resolve(self.get_input_as_text(parser_input))
|
||||
by_name = sheerka.resolve(parser_input.as_text())
|
||||
core.builtin_helpers.set_is_evaluated(by_name)
|
||||
recognized = self.merge_concepts(already_recognized, by_name)
|
||||
|
||||
if len(recognized) == 0:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT,
|
||||
body=parser_input.as_text()))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
else:
|
||||
@@ -94,10 +97,10 @@ class ExactConceptParser(BaseParser):
|
||||
self.log_multiple_results(context, parser_input, res)
|
||||
return res
|
||||
|
||||
def get_words(self, text):
|
||||
tokens = self.get_input_as_tokens(text)
|
||||
@staticmethod
|
||||
def get_words(parser_input):
|
||||
res = []
|
||||
for t in tokens:
|
||||
for t in parser_input.as_tokens():
|
||||
if t.type == TokenKind.EOF:
|
||||
break
|
||||
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
||||
@@ -173,6 +176,6 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
source=parser_input.as_text(),
|
||||
body=concept,
|
||||
try_parsed=concept))
|
||||
|
||||
+76
-73
@@ -4,6 +4,7 @@ from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
@@ -28,7 +29,7 @@ class PythonNode(Node):
|
||||
self.concepts = concepts or {} # when concepts are recognized in the expression
|
||||
|
||||
# def __repr__(self):
|
||||
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
|
||||
# return "PythonNode(parser_input='" + self.parser_input + "', ast=" + self.get_dump(self.ast_) + ")"
|
||||
|
||||
def __repr__(self):
|
||||
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
|
||||
@@ -57,77 +58,6 @@ class PythonNode(Node):
|
||||
return dump
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
python_switcher = {
|
||||
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value)
|
||||
}
|
||||
|
||||
try:
|
||||
tracker = {}
|
||||
source = self.get_input_as_text(parser_input, python_switcher, tracker)
|
||||
source = source.strip()
|
||||
parser_input = parser_input if isinstance(parser_input, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
if not res:
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
error_node = PythonErrorNode(parser_input, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.error_sink.append(e)
|
||||
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input,
|
||||
reason=self.error_sink))
|
||||
else:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=PythonNode(parser_input, tree, tracker),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
def try_parse_statement(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
|
||||
class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
This visitor will find all the name declared in the ast
|
||||
@@ -206,7 +136,8 @@ class LexerNodeParserHelperForPython:
|
||||
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
|
||||
result = python_parser.parse(sub_context, parser_input)
|
||||
sub_context.add_values(return_values=result)
|
||||
|
||||
if result.status:
|
||||
@@ -216,3 +147,75 @@ class LexerNodeParserHelperForPython:
|
||||
return python_node
|
||||
|
||||
return result.body # the error
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
tracker = {} # to keep track of concept tokens (c:xxx:)
|
||||
|
||||
python_switcher = {
|
||||
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value)
|
||||
}
|
||||
|
||||
try:
|
||||
parser_input.reset()
|
||||
|
||||
source_code = parser_input.as_text(python_switcher, tracker)
|
||||
source_code = source_code.strip()
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source_code)
|
||||
if not res:
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source_code)
|
||||
if not res:
|
||||
error_node = PythonErrorNode(parser_input.as_text(), error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.error_sink.append(e)
|
||||
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input.as_text(),
|
||||
reason=self.error_sink))
|
||||
else:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input.as_text(),
|
||||
body=PythonNode(source_code, tree, tracker),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
def try_parse_statement(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import SheerkaExecute
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.PythonParser import PythonParser
|
||||
@@ -77,8 +78,9 @@ class PythonWithConceptsParser(BaseParser):
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
|
||||
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
result = python_parser.parse(sub_context, parser_input)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import List
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
@@ -313,21 +313,6 @@ class InFixToPostFix:
|
||||
"""
|
||||
return len(self.stack) > 0 and isinstance(self.stack[-1], type)
|
||||
|
||||
def _get_lexer_nodes_from_unrecognized(self):
|
||||
"""
|
||||
Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
:return:
|
||||
"""
|
||||
|
||||
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
if not only_parsers_results.status:
|
||||
return None
|
||||
|
||||
return builtin_helpers.get_lexer_nodes(
|
||||
only_parsers_results.body.body,
|
||||
self.unrecognized_tokens.start,
|
||||
self.unrecognized_tokens.tokens)
|
||||
|
||||
def _make_source_code_with_concept(self, start, rpar_token, end):
|
||||
"""
|
||||
@@ -440,7 +425,10 @@ class InFixToPostFix:
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
|
||||
if nodes_sequences:
|
||||
# There are more than one solution found
|
||||
@@ -482,7 +470,10 @@ class InFixToPostFix:
|
||||
:return: list of function_parser_res
|
||||
"""
|
||||
self.unrecognized_tokens.fix_source()
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
if nodes_sequences is None:
|
||||
return None
|
||||
|
||||
@@ -908,13 +899,13 @@ class SyaNodeParser(BaseNodeParser):
|
||||
self.concepts_by_first_keyword = {}
|
||||
self.sya_definitions = {}
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# self.tokens = None
|
||||
#
|
||||
# self.context: ExecutionContext = None
|
||||
# self.text = None
|
||||
# self.sheerka = None
|
||||
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
super().init_from_concepts(context, concepts)
|
||||
@@ -954,15 +945,15 @@ class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
return sya_concept_def
|
||||
|
||||
def infix_to_postfix(self, context, text):
|
||||
def infix_to_postfix(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
Implementing Shunting Yard Algorithm
|
||||
:param context:
|
||||
:param text:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return None
|
||||
|
||||
forked = []
|
||||
@@ -978,32 +969,32 @@ class SyaNodeParser(BaseNodeParser):
|
||||
forked.clear()
|
||||
|
||||
res = [InFixToPostFix(context)]
|
||||
while self.next_token(False):
|
||||
while self.parser_input.next_token(False):
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.reset()
|
||||
|
||||
token = self.get_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
try:
|
||||
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
|
||||
# little optim, no need to lock, unlock or get the concept when parenthesis
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_token(token, self.pos)
|
||||
infix_to_postfix.eat_token(token, self.parser_input.pos)
|
||||
continue
|
||||
|
||||
for infix_to_postfix in res:
|
||||
if infix_to_postfix.eat_token(token, self.pos):
|
||||
if infix_to_postfix.eat_token(token, self.parser_input.pos):
|
||||
infix_to_postfix.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
|
||||
if not concepts:
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_unrecognized(token, self.pos)
|
||||
infix_to_postfix.eat_unrecognized(token, self.parser_input.pos)
|
||||
continue
|
||||
|
||||
if len(concepts) == 1:
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_concept(concepts[0], token, self.pos)
|
||||
infix_to_postfix.eat_concept(concepts[0], token, self.parser_input.pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
@@ -1012,7 +1003,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
for concept in concepts:
|
||||
clone = infix_to_postfix.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, token, self.pos)
|
||||
clone.eat_concept(concept, token, self.parser_input.pos)
|
||||
res = temp_res
|
||||
|
||||
finally:
|
||||
@@ -1036,14 +1027,15 @@ class SyaNodeParser(BaseNodeParser):
|
||||
while len(item.nodes) > 0:
|
||||
res = self.postfix_to_item(sheerka, item.nodes)
|
||||
if isinstance(res, PostFixToItem):
|
||||
items.append(ConceptNode(res.concept, res.start, res.end, self.tokens[res.start: res.end + 1]))
|
||||
items.append(
|
||||
ConceptNode(res.concept, res.start, res.end, self.parser_input.tokens[res.start: res.end + 1]))
|
||||
else:
|
||||
items.append(res)
|
||||
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
|
||||
isinstance(res, UnrecognizedTokensNode)
|
||||
item.nodes = items
|
||||
item.fix_all_pos()
|
||||
item.tokens = self.tokens[item.start:item.end + 1]
|
||||
item.tokens = self.parser_input.tokens[item.start:item.end + 1]
|
||||
item.fix_source(True)
|
||||
return item
|
||||
|
||||
@@ -1069,14 +1061,14 @@ class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
return PostFixToItem(concept, start, end, has_unrecognized)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
if parser_input == "":
|
||||
if parser_input.is_empty():
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -1096,7 +1088,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
|
||||
|
||||
for infix_to_postfix in valid_infix_to_postfixs:
|
||||
sequence = []
|
||||
@@ -1106,7 +1098,10 @@ class SyaNodeParser(BaseNodeParser):
|
||||
has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \
|
||||
isinstance(item, UnrecognizedTokensNode)
|
||||
if isinstance(item, PostFixToItem):
|
||||
to_insert = ConceptNode(item.concept, item.start, item.end, self.tokens[item.start: item.end + 1])
|
||||
to_insert = ConceptNode(item.concept,
|
||||
item.start,
|
||||
item.end,
|
||||
self.parser_input.tokens[item.start: item.end + 1])
|
||||
else:
|
||||
to_insert = item
|
||||
sequence.insert(0, to_insert)
|
||||
|
||||
@@ -109,7 +109,7 @@ class UnrecognizedNodeParser(BaseParser):
|
||||
_validate_concept(value)
|
||||
|
||||
elif isinstance(value, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, value.tokens, PARSERS)
|
||||
res = parse_unrecognized(context, value.source, PARSERS)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
concept.compiled[name] = res.body.body
|
||||
|
||||
Reference in New Issue
Block a user