We can now use concept sets in BNF definitions
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from core.sheerka_logger import get_logger
|
||||
import logging
|
||||
@@ -83,6 +86,17 @@ class BaseParser:
|
||||
value = context.return_value_to_str(r)
|
||||
context.log(self.log, f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, tree, try_parse):
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
|
||||
+23
-15
@@ -127,15 +127,12 @@ class BnfParser(BaseParser):
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=self.source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=tree))
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
@@ -231,15 +228,26 @@ class BnfParser(BaseParser):
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
self.next_token()
|
||||
return ConceptExpression(token.value)
|
||||
# concept = self.sheerka.get(str(token.value))
|
||||
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# self.add_error(CannotResolveConceptNode(str(token.value)))
|
||||
# self.next_token()
|
||||
# return None
|
||||
# else:
|
||||
# self.next_token()
|
||||
# return concept
|
||||
|
||||
concept_name = str(token.value)
|
||||
|
||||
# we are trying to match against a concept which is still under construction !
|
||||
# (for example of recursive bnf definition)
|
||||
if self.context.obj and hasattr(self.context.obj, "name"):
|
||||
if concept_name == str(self.context.obj.name):
|
||||
return ConceptExpression(concept_name)
|
||||
|
||||
concept = self.context.get_concept(concept_name)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
elif hasattr(concept, "__iter__"):
|
||||
self.add_error(
|
||||
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
return concept
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
|
||||
@@ -269,8 +269,12 @@ class ConceptExpression(ParsingExpression):
|
||||
if isinstance(self.concept, Concept):
|
||||
return self.concept.name == other.concept.name
|
||||
|
||||
# when it's only the name of the concept
|
||||
return self.concept == other.concept
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.rule_name))
|
||||
|
||||
@staticmethod
|
||||
def get_parsing_expression_from_name(name):
|
||||
tokens = Tokenizer(name)
|
||||
@@ -302,6 +306,29 @@ class ConceptExpression(ParsingExpression):
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class ConceptGroupExpression(ConceptExpression):
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
concepts_in_group = parser.sheerka.get_set_elements(self.concept)
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
expr = OrderedChoice(nodes)
|
||||
expr.nodes = nodes
|
||||
node = expr.parse(parser)
|
||||
else:
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class Sequence(ParsingExpression):
|
||||
"""
|
||||
Will match sequence of parser expressions in exact order they are defined.
|
||||
@@ -667,7 +694,10 @@ class ConceptLexerParser(BaseParser):
|
||||
# A copy must be created
|
||||
def inner_get_model(expression):
|
||||
if isinstance(expression, Concept):
|
||||
ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
if self.sheerka.isagroup(expression):
|
||||
ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
else:
|
||||
ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
concepts_to_resolve.add(expression)
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
if expression.rule_name is None or expression.rule_name == "":
|
||||
|
||||
@@ -210,12 +210,13 @@ class DefaultParser(BaseParser):
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=tree)
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=text,
|
||||
# body=self.error_sink if self.has_error else tree,
|
||||
# try_parsed=tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
@@ -261,7 +262,7 @@ class DefaultParser(BaseParser):
|
||||
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
||||
|
||||
# get the definition
|
||||
concept_found.definition = self.get_concept_definition(tokens_found_by_parts)
|
||||
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
|
||||
|
||||
# get the ASTs for the remaining parts
|
||||
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
||||
@@ -359,7 +360,7 @@ class DefaultParser(BaseParser):
|
||||
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, tokens_found_by_parts):
|
||||
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
|
||||
if tokens_found_by_parts[Keywords.FROM] is None:
|
||||
return NotInitializedNode()
|
||||
|
||||
@@ -373,7 +374,7 @@ class DefaultParser(BaseParser):
|
||||
return NotInitializedNode()
|
||||
|
||||
regex_parser = BnfParser()
|
||||
with self.context.push(self.name) as sub_context:
|
||||
with self.context.push(self.name, obj=current_concept_def) as sub_context:
|
||||
parsing_result = regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user