Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx
This commit is contained in:
+166
-118
@@ -1,4 +1,3 @@
|
||||
import copy
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
@@ -7,10 +6,10 @@ from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import LexerError, Token, TokenKind
|
||||
from core.tokenizer import Token, TokenKind
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "AtomNode", "Python"]
|
||||
|
||||
@@ -116,13 +115,13 @@ class SyaConceptParserHelper:
|
||||
return len(self.expected) == 0
|
||||
|
||||
def is_atom(self):
|
||||
return len(self.concept.concept.metadata.props) == 0 and len(self.expected) == 0
|
||||
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
|
||||
|
||||
def is_expected(self, token):
|
||||
if self.is_matched():
|
||||
return False
|
||||
|
||||
token_value = self._get_token_value(token)
|
||||
token_value = BaseNodeParser.get_token_value(token)
|
||||
|
||||
for expected in self.expected:
|
||||
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
|
||||
@@ -139,7 +138,7 @@ class SyaConceptParserHelper:
|
||||
|
||||
# return True is a whole sequence of keyword is eaten
|
||||
# example
|
||||
# Concept("foo a bar baz qux b").def_prop("a").def_prop("b")
|
||||
# Concept("foo a bar baz qux b").def_var("a").def_var("b")
|
||||
# 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting
|
||||
if len(self.expected) == 0:
|
||||
return True
|
||||
@@ -169,14 +168,14 @@ class SyaConceptParserHelper:
|
||||
self.concept = self.concept.concept
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
# @staticmethod
|
||||
# def _get_token_value(token):
|
||||
# if token.type == TokenKind.STRING:
|
||||
# return token.value[1:-1]
|
||||
# elif token.type == TokenKind.KEYWORD:
|
||||
# return token.value.value
|
||||
# else:
|
||||
# return token.value
|
||||
|
||||
def clone(self):
|
||||
clone = SyaConceptParserHelper(self.concept, self.start, self.end)
|
||||
@@ -215,7 +214,10 @@ class InFixToPostFix:
|
||||
if not isinstance(other, InFixToPostFix):
|
||||
return False
|
||||
|
||||
return self.out == other.out
|
||||
return self.out == other.out and self.errors == other.errors
|
||||
|
||||
def __hash__(self):
|
||||
return len(self.sequence) + len(self.errors)
|
||||
|
||||
def _add_error(self, error):
|
||||
self.errors.append(error)
|
||||
@@ -396,6 +398,7 @@ class InFixToPostFix:
|
||||
del current_concept.expected[0]
|
||||
|
||||
def manage_unrecognized(self):
|
||||
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
return
|
||||
|
||||
@@ -514,10 +517,10 @@ class InFixToPostFix:
|
||||
def handle_expected_token(self, token, pos):
|
||||
"""
|
||||
True if the token is part of the concept being parsed and the last token in a sequence is eaten
|
||||
Example : Concept("foo a bar b").def_prop("a").def_prop("b")
|
||||
Example : Concept("foo a bar b").def_var("a").def_var("b")
|
||||
The expected tokens are 'foo' and 'bar' (as a and b are parameters)
|
||||
|
||||
Example: Concept("foo a bar baz b").def_prop("a").def_prop("b")
|
||||
Example: Concept("foo a bar baz b").def_var("a").def_var("b")
|
||||
If the token is 'bar', it will be eaten but handle_expected_token() will return False
|
||||
as we still expect 'baz'
|
||||
:param token:
|
||||
@@ -565,6 +568,18 @@ class InFixToPostFix:
|
||||
|
||||
return True
|
||||
|
||||
# else:
|
||||
# if token.type != TokenKind.WHITESPACE:
|
||||
# # hack, because whitespaces are not correctly parsed in self.expected
|
||||
# # KSI 2020/04/25
|
||||
# # I no longer understand why we are in a loop (the reverse one)
|
||||
# # if we are parsing a concept and the expected token does not match
|
||||
# # The whole class should be in error
|
||||
# self._add_error(UnexpectedTokenErrorNode(
|
||||
# f"Failed to parse '{current_concept.concept.concept}'",
|
||||
# token, current_concept.expected))
|
||||
# return False
|
||||
|
||||
return False
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
@@ -581,7 +596,7 @@ class InFixToPostFix:
|
||||
|
||||
if self.handle_expected_token(token, pos):
|
||||
# a token is found, let's check if it's part of a concepts being parsed
|
||||
# example Concept(name="foo", definition="foo a bar b").def_prop("a").def_prop("b")
|
||||
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
|
||||
# if the token 'bar' is found, it has to be considered as part of the concept foo
|
||||
self.debug.append(token)
|
||||
return True
|
||||
@@ -780,16 +795,13 @@ class PostFixToItem:
|
||||
has_unrecognized: bool
|
||||
|
||||
|
||||
class SyaNodeParser(BaseParser):
|
||||
class SyaNodeParser(BaseNodeParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "SyaNode", 50)
|
||||
super().__init__("SyaNode", 50, **kwargs)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
self.sya_definitions = {}
|
||||
if sheerka.sya_definitions:
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
self.sya_definitions = sheerka.resolved_sya_def
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = {}
|
||||
@@ -803,104 +815,133 @@ class SyaNodeParser(BaseParser):
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts=None, sya_definitions=None):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
super().init_from_concepts(context, concepts)
|
||||
|
||||
sya_definitions = kwargs.get("sya", None)
|
||||
if sya_definitions:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if concepts:
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token):
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
Predicate that select concepts that must handled by AtomNodeParser
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
# We only concepts that has parameter (refuse atoms)
|
||||
# Bnf definitions are not supposed to be managed by this parser either
|
||||
return len(concept.metadata.variables) > 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
@staticmethod
|
||||
def _get_sya_concept_def(parser, concept):
|
||||
sya_concept_def = SyaConceptDef(concept)
|
||||
if concept.id in parser.sya_definitions:
|
||||
sya_def = parser.sya_definitions.get(concept.id)
|
||||
if sya_def[0] is not None:
|
||||
sya_concept_def.precedence = sya_def[0]
|
||||
if sya_def[1] is not None:
|
||||
sya_concept_def.associativity = sya_def[1]
|
||||
return sya_concept_def
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
# def reset_parser(self, context, text):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
# except LexerError as e:
|
||||
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
# return False
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# return True
|
||||
#
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.next_token()
|
||||
# return error
|
||||
#
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if len(concept.metadata.props) == 0:
|
||||
# only concepts that has parameter (refuse atoms)
|
||||
# Note that this test is needed if the definition of the concept has changed
|
||||
continue
|
||||
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
# bnf definitions are not supposed to be managed by this parser
|
||||
continue
|
||||
|
||||
sya_concept_def = SyaConceptDef(concept)
|
||||
if concept.id in self.sya_definitions:
|
||||
sya_def = self.sya_definitions[concept.id]
|
||||
if sya_def[0] is not None:
|
||||
sya_concept_def.precedence = sya_def[0]
|
||||
if sya_def[1] is not None:
|
||||
sya_concept_def.associativity = sya_def[1]
|
||||
|
||||
result.append(sya_concept_def)
|
||||
return result
|
||||
|
||||
return None
|
||||
# def initialize(self, context, concepts=None, sya_definitions=None):
|
||||
# self.context = context
|
||||
# self.sheerka = context.sheerka
|
||||
#
|
||||
# if sya_definitions:
|
||||
# self.sya_definitions = sya_definitions
|
||||
#
|
||||
# if concepts:
|
||||
# for concept in concepts:
|
||||
# keywords = concept.key.split()
|
||||
# for keyword in keywords:
|
||||
# if keyword.startswith(VARIABLE_PREFIX):
|
||||
# continue
|
||||
#
|
||||
# self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
# break
|
||||
#
|
||||
# return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
#
|
||||
# def get_concepts(self, token):
|
||||
# """
|
||||
# Tries to find if there are concepts that match the value of the token
|
||||
# :param token:
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# if token.type == TokenKind.STRING:
|
||||
# name = token.value[1:-1]
|
||||
# elif token.type == TokenKind.KEYWORD:
|
||||
# name = token.value.value
|
||||
# else:
|
||||
# name = token.value
|
||||
#
|
||||
# result = []
|
||||
# if name in self.concepts_by_first_keyword:
|
||||
# for concept_id in self.concepts_by_first_keyword[name]:
|
||||
#
|
||||
# concept = self.sheerka.get_by_id(concept_id)
|
||||
#
|
||||
# if len(concept.metadata.props) == 0:
|
||||
# # only concepts that has parameter (refuse atoms)
|
||||
# # Note that this test is needed if the definition of the concept has changed
|
||||
# continue
|
||||
#
|
||||
# if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
# # bnf definitions are not supposed to be managed by this parser
|
||||
# continue
|
||||
#
|
||||
# sya_concept_def = SyaConceptDef(concept)
|
||||
# if concept.id in self.sya_definitions:
|
||||
# sya_def = self.sya_definitions[concept.id]
|
||||
# if sya_def[0] is not None:
|
||||
# sya_concept_def.precedence = sya_def[0]
|
||||
# if sya_def[1] is not None:
|
||||
# sya_concept_def.associativity = sya_def[1]
|
||||
#
|
||||
# result.append(sya_concept_def)
|
||||
# return result
|
||||
#
|
||||
# return None
|
||||
|
||||
def infix_to_postfix(self, context, text):
|
||||
"""
|
||||
@@ -943,7 +984,7 @@ class SyaNodeParser(BaseParser):
|
||||
if infix_to_postfix.eat_token(token, self.pos):
|
||||
infix_to_postfix.lock()
|
||||
|
||||
concepts = self.get_concepts(token)
|
||||
concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
|
||||
if not concepts:
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.eat_unrecognized(token, self.pos)
|
||||
@@ -988,7 +1029,7 @@ class SyaNodeParser(BaseParser):
|
||||
else:
|
||||
items.append(res)
|
||||
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
|
||||
isinstance(res, UnrecognizedTokensNode)
|
||||
isinstance(res, UnrecognizedTokensNode)
|
||||
item.nodes = items
|
||||
item.fix_all_pos()
|
||||
item.tokens = self.tokens[item.start:item.end + 1]
|
||||
@@ -1000,7 +1041,7 @@ class SyaNodeParser(BaseParser):
|
||||
end = item.end
|
||||
has_unrecognized = False
|
||||
concept = sheerka.new_from_template(item.concept, item.concept.id)
|
||||
for param_index in reversed(range(len(concept.metadata.props))):
|
||||
for param_index in reversed(range(len(concept.metadata.variables))):
|
||||
inner_item = self.postfix_to_item(sheerka, postfixed)
|
||||
if inner_item.start < start:
|
||||
start = inner_item.start
|
||||
@@ -1008,7 +1049,7 @@ class SyaNodeParser(BaseParser):
|
||||
end = inner_item.end
|
||||
has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode)
|
||||
|
||||
param_name = concept.metadata.props[param_index][0]
|
||||
param_name = concept.metadata.variables[param_index][0]
|
||||
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
|
||||
[inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \
|
||||
inner_item
|
||||
@@ -1115,3 +1156,10 @@ class SyaNodeParser(BaseParser):
|
||||
result.append(infix_to_postfix)
|
||||
|
||||
return result
|
||||
|
||||
# @staticmethod
|
||||
# def init_sheerka(self, sheerka):
|
||||
# if hasattr(BaseNodeParser, "init_sheerka"):
|
||||
# BaseNodeParser.init_sheerka(sheerka)
|
||||
#
|
||||
# # init syadefinitins
|
||||
|
||||
Reference in New Issue
Block a user