Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

This commit is contained in:
2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
+195 -65
View File
@@ -2,8 +2,9 @@ from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
@@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode):
self.end == other.end and \
self.source == other.source
if isinstance(other, SCN):
return other == self
if not isinstance(other, SourceCodeNode):
return False
@@ -352,6 +356,51 @@ class HelperWithPos:
return self
class SCN(HelperWithPos):
"""
SourceCodeNode tester class
It matches with SourceCodeNode but with less constraints
SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
"""
def __init__(self, source, start=None, end=None):
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, SourceCodeNode):
if self.source != other.source:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.source == other.source and \
self.start == other.start and \
self.end == other.end
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"SCN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CN(HelperWithPos):
"""
ConceptNode tester class
@@ -390,6 +439,8 @@ class CN(HelperWithPos):
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
return True
if not isinstance(other, CN):
@@ -425,9 +476,10 @@ class CNC(CN):
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
"""
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
self.exclude_body = exclude_body
def __eq__(self, other):
if id(self) == id(other):
@@ -442,7 +494,13 @@ class CNC(CN):
return False
if self.end is not None and self.end != other.end:
return False
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
if self.source is not None and self.source != other.source:
return False
if self.exclude_body:
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
else:
to_compare = other.concept.compiled
return self.compiled == to_compare
if not isinstance(other, CNC):
return False
@@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser):
super().__init__(name, priority)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.init_from_sheerka(sheerka)
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
else:
self.concepts_by_first_keyword = None
self.sya_definitions = None
self.token = None
self.pos = -1
@@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser):
self.text = None
self.sheerka = None
def init_from_sheerka(self, sheerka):
def init_from_concepts(self, context, concepts, **kwargs):
"""
Use the definitons from Sheerka to initialize
:param sheerka:
Initialize the parser with a list of concepts
For unit tests convenience
:param context
:param concepts
:return:
"""
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
if sheerka.sya_definitions:
self.sya_definitions = {}
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
def reset_parser(self, context, text):
self.context = context
@@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser):
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
"""
To quickly find a concept, we store them in an hash where the key is the first token of the concept
example :
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
So the key to use will be "foo"
Concept("a foo").def_prop("a") -> first token is "foo"
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
Note that under the same key, there will be multiple entry
a B-Tree may be a better implementation in the future
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
:param context:
:param concepts: list[Concept]
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
:param use_sheerka: first init with the definitions from Sheerka
:return:
"""
self.context = context
self.sheerka = context.sheerka
if use_sheerka:
self.init_from_sheerka(self.sheerka)
if sya_definitions:
if self.sya_definitions:
self.sya_definitions.update(sya_definitions)
else:
self.sya_definitions = sya_definitions
if self.concepts_by_first_keyword is None:
self.concepts_by_first_keyword = {}
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token, to_keep, to_map=None):
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
Tries to find if there are concepts that match the value of the token
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1]
name = token.value[1:-1] if strip_quotes else token.value
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
custom_concepts = custom(name) if custom else []
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
for concept_id in self.concepts_by_first_keyword.get(name):
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept) if to_map else concept
concept = to_map(self, concept) if to_map else concept
result.append(concept)
return result
return result + custom_concepts
return None
return custom_concepts if custom else None
@staticmethod
def get_token_value(token):
@@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser):
return token.value.value
else:
return token.value
@staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
"""
Create the map describing the first token expected by a concept
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:return:
"""
sheerka = context.sheerka
res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {}
for concept in concepts:
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, concept)
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
sheerka = context.sheerka
def _make_unique(elements):
keys = {}
for e in elements:
keys[e] = 1
return list(keys.keys())
def _resolve_concepts(concept_str):
resolved = []
to_resolve = []
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
for concept_to_resolve_str in to_resolve:
resolved += _resolve_concepts(concept_to_resolve_str)
return resolved
res = {}
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
resolved_keywords = _resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = _make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_sya_associativity_and_precedence(context, sya):
pass
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.bnf:
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.bnf)
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
from parsers.BnfParser import BnfParser
regex_parser = BnfParser()
desc = f"Resolving BNF {concept.metadata.definition}"
with context.push(parser_name, obj=concept, desc=desc) as sub_context:
sub_context.add_inputs(parser_input=concept.metadata.definition)
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
sub_context.add_values(return_values=bnf_parsing_ret_val)
if not bnf_parsing_ret_val.status:
raise Exception(bnf_parsing_ret_val.value)
concept.bnf = bnf_parsing_ret_val.body.body
if concept.id:
context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache