I can define and eval BNF definitions

This commit is contained in:
2019-12-17 21:19:44 +01:00
parent c668cc46d2
commit 88cd3162be
25 changed files with 1099 additions and 569 deletions
+6
View File
@@ -27,6 +27,12 @@ class ErrorNode(Node):
pass
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
class BaseParser:
PREFIX = "Parsers:"
+227
View File
@@ -0,0 +1,227 @@
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
class BnfParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, BnfParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
+149 -332
View File
@@ -1,5 +1,5 @@
#####################################################################################################
# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out
# what to do.
# Dejanović I., Milosavljević G., Vaderna R.:
@@ -10,7 +10,6 @@ from dataclasses import field, dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import ExecutionContext
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils
@@ -40,6 +39,18 @@ def flatten(iterable):
class LexerNode(Node):
start: int
end: int
tokens: list = None
source: str = None
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and self.end == other.end
class ConceptNode(LexerNode):
@@ -48,17 +59,24 @@ class ConceptNode(LexerNode):
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
super().__init__(start, end)
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.tokens = tokens
self.source = source
self.children = children
self.underlying = underlying
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) == 2:
return self.concept == other[0] and self.source == other[1]
else:
return self.concept == other[0] and \
self.start == other[1] and \
self.end == other[2] and \
self.source == other[3]
if not super().__eq__(other):
return False
@@ -66,10 +84,14 @@ class ConceptNode(LexerNode):
return False
return self.concept == other.concept and \
self.source == other.source
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source))
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class NonTerminalNode(LexerNode):
@@ -77,8 +99,8 @@ class NonTerminalNode(LexerNode):
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, children=None):
super().__init__(start, end)
def __init__(self, parsing_expression, start, end, tokens, children=None):
super().__init__(start, end, tokens)
self.parsing_expression = parsing_expression
self.children = children
@@ -90,6 +112,21 @@ class NonTerminalNode(LexerNode):
sub_names = ""
return name + sub_names
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, NonTerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.children == other.children
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.children))
class TerminalNode(LexerNode):
"""
@@ -97,7 +134,7 @@ class TerminalNode(LexerNode):
"""
def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end)
super().__init__(start, end, source=value)
self.parsing_expression = parsing_expression
self.value = value
@@ -105,23 +142,27 @@ class TerminalNode(LexerNode):
name = self.parsing_expression.rule_name or ""
return name + f"'{self.value}'"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, TerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.value == other.value
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@@ -175,7 +216,7 @@ class Sequence(ParsingExpression):
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, children)
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -194,7 +235,7 @@ class OrderedChoice(ParsingExpression):
for e in self.nodes:
node = e.parse(parser)
if node:
return NonTerminalNode(self, init_pos, node.end, [node])
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
parser.seek(init_pos) # backtrack
@@ -214,13 +255,18 @@ class Optional(ParsingExpression):
def _parse(self, parser):
init_pos = parser.pos
selected_node = NonTerminalNode(self, parser.pos, -1, [])
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
for e in self.nodes:
node = e.parse(parser)
if node:
if node.end > selected_node.end:
selected_node = node
selected_node = NonTerminalNode(
self,
node.start,
node.end,
parser.tokens[node.start: node.end + 1],
[node])
parser.seek(init_pos) # backtrack
@@ -327,12 +373,12 @@ class ConceptMatch(Match):
When the grammar is created, it is replaced by the actual concept
"""
def __init__(self, concept_name):
super(Match, self).__init__()
self.concept_name = concept_name
def __init__(self, concept, rule_name=""):
super(Match, self).__init__(rule_name=rule_name)
self.concept = concept
def __repr__(self):
return f"{self.concept_name}"
return f"{self.concept}"
def __eq__(self, other):
if not super().__eq__(other):
@@ -341,32 +387,37 @@ class ConceptMatch(Match):
if not isinstance(other, ConceptMatch):
return False
return self.concept_name == other.concept_name
class CrossRef:
"""
During the creation of the model,
Creates reference to a concept, as it may not be resolved yet
"""
def __init__(self, concept):
self.concept = concept
def __repr__(self):
return f"ref({self.concept.key})"
def __eq__(self, other):
if not isinstance(other, CrossRef):
return False
if isinstance(self.concept, Concept):
return self.concept.name == other.concept.name
return self.concept == other.concept
def _parse(self, parser):
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return None
if to_match not in parser.concepts_grammars:
return None
self.concept = to_match # Memoize
node = parser.concepts_grammars[to_match].parse(parser)
if node is None:
return None
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class ConceptLexerParser(BaseParser):
def __init__(self, concepts_dict=None):
def __init__(self, **kwargs):
super().__init__("ConceptLexer")
self.concepts_dict = concepts_dict or {} # dict of concept, grammar
if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
else:
self.concepts_grammars = {}
self.ignore_case = True
self.token = None
@@ -430,24 +481,23 @@ class ConceptLexerParser(BaseParser):
self.pos -= 1
self.token = self.tokens[self.pos]
def initialize(self, context, grammars):
def initialize(self, context, concepts_definitions):
"""
Adds a bunch of concepts, and how they can be recognized
:param context: execution context
:param grammars: dictionary of concept, concept_definition
:param concepts_definitions: dictionary of concept, concept_definition
:return:
"""
self.context = context
self.sheerka = context.sheerka
nodes_to_resolve = []
concepts_to_resolve = set()
# ## Gets the grammars
for concept, concept_def in grammars.items():
for concept, concept_def in concepts_definitions.items():
concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
self.concepts_dict[concept] = grammar
grammar = self.get_model(concept_def, concepts_to_resolve)
self.concepts_grammars[concept] = grammar
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
@@ -456,73 +506,68 @@ class ConceptLexerParser(BaseParser):
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove:
concepts_to_resolve.remove(concept)
del self.concepts_dict[concept]
# ## Resolves cross references and remove grammar with unresolved references
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
del self.concepts_grammars[concept]
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
else:
return self.sheerka.ret(self.name, True, self.concepts_dict)
return self.sheerka.ret(self.name, True, self.concepts_grammars)
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
def get_concept(concept_name):
if concept_name in self.context.concepts_cache:
return self.context.concepts_cache[concept_name]
return self.sheerka.get(concept_name)
def get_concept(self, concept_name):
if concept_name in self.context.concepts_cache:
return self.context.concepts_cache[concept_name]
return self.sheerka.get(concept_name)
def get_model(self, concept_def, concepts_to_resolve):
# TODO
# inner_get_model must not modify the initial ParsingExpression
# A copy must be created
def inner_get_model(expression):
if isinstance(expression, Concept):
ret = CrossRef(expression)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
ret = ConceptMatch(expression, rule_name=expression.name)
concepts_to_resolve.add(expression)
elif isinstance(expression, ConceptMatch):
if expression.rule_name is None or expression.rule_name == "":
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
else expression.concept
concepts_to_resolve.add(expression.concept)
ret = expression
elif isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case)
elif isinstance(expression, StrMatch):
ret = expression
if ret.ignore_case is None:
ret.ignore_case = self.ignore_case
elif isinstance(expression, ConceptMatch):
to_match = get_concept(expression.concept_name)
if hasattr(to_match, "__iter__"):
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
else:
ret = CrossRef(to_match)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \
isinstance(expression, Optional):
ret = expression
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
if any((isinstance(x, CrossRef) for x in ret.nodes)):
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
return ret
model = inner_get_model(concept_def)
if isinstance(model, CrossRef):
concepts_to_resolve.add(concept)
model.rule_name = concept.key
return model
def detect_infinite_recursion(self, concepts_to_resolve):
# infinite recursion matcher
def _is_infinite_recursion(ref_concept, node):
if isinstance(node, CrossRef):
if isinstance(node, ConceptMatch):
if node.concept == ref_concept:
return True
return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept])
if isinstance(node.concept, str):
to_match = self.get_concept(node.concept)
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return False
else:
to_match = node.concept
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
if isinstance(node, OrderedChoice):
return _is_infinite_recursion(ref_concept, node.nodes[0])
@@ -537,32 +582,16 @@ class ConceptLexerParser(BaseParser):
removed_concepts = []
for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e]
if isinstance(e, str):
e = self.get_concept(e)
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
to_resolve = self.concepts_grammars[e]
if _is_infinite_recursion(e, to_resolve):
removed_concepts.append(e)
return removed_concepts
# Cross-ref resolving
def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve):
repeat = True
while repeat:
repeat = False
for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e]
if isinstance(to_resolve, CrossRef):
repeat = True
self.concepts_dict[e] = self.concepts_dict[to_resolve.concept]
for e in nodes_to_resolve:
if not isinstance(e, ParsingExpression):
continue # cases when a concept directly references another concept
for i, node in enumerate(e.nodes):
if isinstance(node, CrossRef):
if node.concept in self.concepts_dict:
e.nodes[i] = self.concepts_dict[node.concept]
def parse(self, context, text):
if text == "":
return context.sheerka.ret(
@@ -591,13 +620,17 @@ class ConceptLexerParser(BaseParser):
while True:
init_pos = self.pos
res = []
for concept, grammar in self.concepts_dict.items():
for concept, grammar in self.concepts_grammars.items():
self.seek(init_pos)
node = grammar.parse(self)
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None:
concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1])
if hasattr(node, "children"):
concept_node.children = node.children
concept_node = ConceptNode(
concept,
node.start,
node.end,
self.tokens[node.start: node.end + 1],
None,
node)
res.append(concept_node)
if len(res) == 0: # not recognized
@@ -606,9 +639,7 @@ class ConceptLexerParser(BaseParser):
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
break
res = self.get_bests(res) # only keep the concept that eat the more tokens
for r in res:
r.children = flatten(r.children)
res = self.get_bests(res) # only keep the concepts that eat the more tokens
concepts_found = core.utils.product(concepts_found, res)
# loop
@@ -659,220 +690,6 @@ class ConceptLexerParser(BaseParser):
return by_end_pos[max(by_end_pos)]
class RegexParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, RegexParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
class ParsingExpressionVisitor:
"""
visit ParsingExpression
+4 -4
View File
@@ -2,12 +2,12 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserRes
from core.concept import ConceptParts
import core.builtin_helpers
import core.utils
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
import logging
from parsers.ConceptLexerParser import RegexParser
from parsers.BnfParser import BnfParser
log = logging.getLogger(__name__)
@@ -206,7 +206,7 @@ class DefaultParser(BaseParser):
Parse sheerka specific grammar (like def concept)
"""
def __init__(self):
def __init__(self, **kwargs):
BaseParser.__init__(self, "DefaultParser")
self.lexer_iter = None
self._current = None
@@ -427,7 +427,7 @@ class DefaultParser(BaseParser):
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return NotInitializedNode()
regex_parser = RegexParser()
regex_parser = BnfParser()
new_context = self.context.push(self.name)
parsing_result = regex_parser.parse(new_context, tokens)
if not parsing_result.status:
+1 -1
View File
@@ -10,7 +10,7 @@ class EmptyStringParser(BaseParser):
To parse empty or blank strings
"""
def __init__(self):
def __init__(self, **kwargs):
BaseParser.__init__(self, "NullParser")
def parse(self, context, text):
+2 -2
View File
@@ -1,7 +1,7 @@
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept, VARIABLE_PREFIX
from core.concept import VARIABLE_PREFIX
import logging
log = logging.getLogger(__name__)
@@ -14,7 +14,7 @@ class ExactConceptParser(BaseParser):
MAX_WORDS_SIZE = 10
def __init__(self):
def __init__(self, **kwargs):
BaseParser.__init__(self, "ConceptParser")
def parse(self, context, text):
+2 -3
View File
@@ -2,7 +2,6 @@ from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
import ast
import copy
import logging
log = logging.getLogger(__name__)
@@ -57,10 +56,10 @@ class PythonParser(BaseParser):
Parse Python scripts
"""
def __init__(self, source="<undef>"):
def __init__(self, **kwargs):
BaseParser.__init__(self, "PythonParser")
self.source = source
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)