We can now use concept sets in BNF definitions

This commit is contained in:
2020-01-19 21:48:43 +01:00
parent a7b239c167
commit 821614a6c4
16 changed files with 643 additions and 93 deletions
+14
View File
@@ -1,4 +1,7 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords
from core.sheerka_logger import get_logger
import logging
@@ -83,6 +86,17 @@ class BaseParser:
value = context.return_value_to_str(r)
context.log(self.log, f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, tree, try_parse):
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
return sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=self.error_sink if self.has_error else tree,
try_parsed=try_parse)
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None):
if tokens is None:
+23 -15
View File
@@ -127,15 +127,12 @@ class BnfParser(BaseParser):
except LexerError as e:
self.add_error(e, False)
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
value)
return ret
@@ -231,15 +228,26 @@ class BnfParser(BaseParser):
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptExpression(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
concept_name = str(token.value)
# we are trying to match against a concept which is still under construction !
# (for example of recursive bnf definition)
if self.context.obj and hasattr(self.context.obj, "name"):
if concept_name == str(self.context.obj.name):
return ConceptExpression(concept_name)
concept = self.context.get_concept(concept_name)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
elif hasattr(concept, "__iter__"):
self.add_error(
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
body=("key", concept_name)))
return None
else:
return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
+31 -1
View File
@@ -269,8 +269,12 @@ class ConceptExpression(ParsingExpression):
if isinstance(self.concept, Concept):
return self.concept.name == other.concept.name
# when it's only the name of the concept
return self.concept == other.concept
def __hash__(self):
return hash((self.concept, self.rule_name))
@staticmethod
def get_parsing_expression_from_name(name):
tokens = Tokenizer(name)
@@ -302,6 +306,29 @@ class ConceptExpression(ParsingExpression):
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class ConceptGroupExpression(ConceptExpression):
def _parse(self, parser):
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return None
self.concept = to_match # Memoize
if to_match not in parser.concepts_grammars:
concepts_in_group = parser.sheerka.get_set_elements(self.concept)
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
expr = OrderedChoice(nodes)
expr.nodes = nodes
node = expr.parse(parser)
else:
node = parser.concepts_grammars[to_match].parse(parser)
if node is None:
return None
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class Sequence(ParsingExpression):
"""
Will match sequence of parser expressions in exact order they are defined.
@@ -667,7 +694,10 @@ class ConceptLexerParser(BaseParser):
# A copy must be created
def inner_get_model(expression):
if isinstance(expression, Concept):
ret = ConceptExpression(expression, rule_name=expression.name)
if self.sheerka.isagroup(expression):
ret = ConceptGroupExpression(expression, rule_name=expression.name)
else:
ret = ConceptExpression(expression, rule_name=expression.name)
concepts_to_resolve.add(expression)
elif isinstance(expression, ConceptExpression):
if expression.rule_name is None or expression.rule_name == "":
+10 -9
View File
@@ -210,12 +210,13 @@ class DefaultParser(BaseParser):
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else tree,
try_parsed=tree)
body = self.get_return_value_body(context.sheerka, text, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=text,
# body=self.error_sink if self.has_error else tree,
# try_parsed=tree)
ret = self.sheerka.ret(
self.name,
@@ -261,7 +262,7 @@ class DefaultParser(BaseParser):
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
# get the definition
concept_found.definition = self.get_concept_definition(tokens_found_by_parts)
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
# get the ASTs for the remaining parts
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
@@ -359,7 +360,7 @@ class DefaultParser(BaseParser):
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
return name_node
def get_concept_definition(self, tokens_found_by_parts):
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
if tokens_found_by_parts[Keywords.FROM] is None:
return NotInitializedNode()
@@ -373,7 +374,7 @@ class DefaultParser(BaseParser):
return NotInitializedNode()
regex_parser = BnfParser()
with self.context.push(self.name) as sub_context:
with self.context.push(self.name, obj=current_concept_def) as sub_context:
parsing_result = regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)