Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx
This commit is contained in:
+195
-65
@@ -2,8 +2,9 @@ from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
@@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode):
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, SCN):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
@@ -352,6 +356,51 @@ class HelperWithPos:
|
||||
return self
|
||||
|
||||
|
||||
class SCN(HelperWithPos):
|
||||
"""
|
||||
SourceCodeNode tester class
|
||||
It matches with SourceCodeNode but with less constraints
|
||||
|
||||
SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
|
||||
"""
|
||||
|
||||
def __init__(self, source, start=None, end=None):
|
||||
super().__init__(start, end)
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, SourceCodeNode):
|
||||
if self.source != other.source:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
return False
|
||||
|
||||
return self.source == other.source and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"SCN(source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CN(HelperWithPos):
|
||||
"""
|
||||
ConceptNode tester class
|
||||
@@ -390,6 +439,8 @@ class CN(HelperWithPos):
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
if self.source is not None and self.source != other.source:
|
||||
return False
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
@@ -425,9 +476,10 @@ class CNC(CN):
|
||||
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
|
||||
"""
|
||||
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
self.exclude_body = exclude_body
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
@@ -442,7 +494,13 @@ class CNC(CN):
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
|
||||
if self.source is not None and self.source != other.source:
|
||||
return False
|
||||
if self.exclude_body:
|
||||
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
to_compare = other.concept.compiled
|
||||
return self.compiled == to_compare
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
@@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser):
|
||||
super().__init__(name, priority)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.init_from_sheerka(sheerka)
|
||||
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
self.sya_definitions = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
@@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser):
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def init_from_sheerka(self, sheerka):
|
||||
def init_from_concepts(self, context, concepts, **kwargs):
|
||||
"""
|
||||
Use the definitons from Sheerka to initialize
|
||||
:param sheerka:
|
||||
Initialize the parser with a list of concepts
|
||||
For unit tests convenience
|
||||
:param context
|
||||
:param concepts
|
||||
:return:
|
||||
"""
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
if sheerka.sya_definitions:
|
||||
self.sya_definitions = {}
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
|
||||
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
@@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
|
||||
"""
|
||||
To quickly find a concept, we store them in an hash where the key is the first token of the concept
|
||||
example :
|
||||
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
|
||||
So the key to use will be "foo"
|
||||
|
||||
Concept("a foo").def_prop("a") -> first token is "foo"
|
||||
|
||||
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
|
||||
Note that under the same key, there will be multiple entry
|
||||
a B-Tree may be a better implementation in the future
|
||||
|
||||
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
|
||||
:param context:
|
||||
:param concepts: list[Concept]
|
||||
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
|
||||
:param use_sheerka: first init with the definitions from Sheerka
|
||||
:return:
|
||||
"""
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
if use_sheerka:
|
||||
self.init_from_sheerka(self.sheerka)
|
||||
|
||||
if sya_definitions:
|
||||
if self.sya_definitions:
|
||||
self.sya_definitions.update(sya_definitions)
|
||||
else:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if self.concepts_by_first_keyword is None:
|
||||
self.concepts_by_first_keyword = {}
|
||||
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token, to_keep, to_map=None):
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
|
||||
:param to_map:
|
||||
:param strip_quotes: Remove quotes from strings
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
name = token.value[1:-1] if strip_quotes else token.value
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
custom_concepts = custom(name) if custom else []
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
for concept_id in self.concepts_by_first_keyword.get(name):
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept) if to_map else concept
|
||||
concept = to_map(self, concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result
|
||||
return result + custom_concepts
|
||||
|
||||
return None
|
||||
return custom_concepts if custom else None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
@@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser):
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
||||
"""
|
||||
Create the map describing the first token expected by a concept
|
||||
:param context:
|
||||
:param concepts: lists of concepts to parse
|
||||
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {}
|
||||
for concept in concepts:
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
|
||||
if keywords is None:
|
||||
# no first token found for a concept ?
|
||||
return sheerka.ret(sheerka.name, False, concept)
|
||||
|
||||
for keyword in keywords:
|
||||
res.setdefault(keyword, []).append(concept.id)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
||||
sheerka = context.sheerka
|
||||
|
||||
def _make_unique(elements):
|
||||
keys = {}
|
||||
for e in elements:
|
||||
keys[e] = 1
|
||||
return list(keys.keys())
|
||||
|
||||
def _resolve_concepts(concept_str):
|
||||
resolved = []
|
||||
to_resolve = []
|
||||
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
resolved += _resolve_concepts(concept_to_resolve_str)
|
||||
|
||||
return resolved
|
||||
|
||||
res = {}
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
resolved_keywords = _resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = _make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_sya_associativity_and_precedence(context, sya):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_first_tokens(sheerka, concept):
|
||||
"""
|
||||
|
||||
:param sheerka:
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.bnf:
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
bnf_visitor.visit(concept.bnf)
|
||||
return bnf_visitor.first_tokens
|
||||
else:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
return [keyword]
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
|
||||
from parsers.BnfParser import BnfParser
|
||||
regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {concept.metadata.definition}"
|
||||
with context.push(parser_name, obj=concept, desc=desc) as sub_context:
|
||||
sub_context.add_inputs(parser_input=concept.metadata.definition)
|
||||
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
|
||||
sub_context.add_values(return_values=bnf_parsing_ret_val)
|
||||
|
||||
if not bnf_parsing_ret_val.status:
|
||||
raise Exception(bnf_parsing_ret_val.value)
|
||||
|
||||
concept.bnf = bnf_parsing_ret_val.body.body
|
||||
if concept.id:
|
||||
context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache
|
||||
|
||||
Reference in New Issue
Block a user