Fixed #9 : I can parse 'def concept'
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from common.global_symbols import NotFound
|
||||
from core.error import ErrorObj
|
||||
from parsers.parser_utils import SimpleParser, UnexpectedEof, UnexpectedToken
|
||||
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
|
||||
VariableExpression, ZeroOrMore
|
||||
from parsers.tokenizer import TokenKind, Tokenizer
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnknownConcept(ErrorObj):
|
||||
concept_id: str
|
||||
concept_name: str
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Cannot find concept defined by id='{self.concept_id}' and/or name '{self.concept_name}'"
|
||||
|
||||
|
||||
class BnfDefinitionParser(SimpleParser):
|
||||
"""
|
||||
Parser used to transform literal into ParsingExpression
|
||||
example :
|
||||
a | b c -> Sequence(OrderedChoice(a, b), c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
' ' space is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, context, text, concept_name=None):
|
||||
super().__init__(text, skip_whitespace_default_behaviour=False)
|
||||
self.context = context
|
||||
self.concept_name = concept_name # name of the concept currently being constructed
|
||||
|
||||
self.nb_open_par = 0
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
return self.token.type == second or \
|
||||
self.token.type == first and self.check_next_token().type == second
|
||||
|
||||
def parse(self):
|
||||
tree = self._parse_choice()
|
||||
|
||||
if self.token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedToken(self.token, TokenKind.EOF))
|
||||
|
||||
return None if self.error_sink else tree
|
||||
|
||||
def _parse_choice(self):
|
||||
"""
|
||||
a | b | c
|
||||
|
||||
<choice> := <sequence> ( '|' <sequence> )*
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
sequence = self._parse_sequence()
|
||||
|
||||
self.eat_whitespace()
|
||||
|
||||
if self.token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_whitespace()
|
||||
if self.token is None or self.token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self._parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return self._eat_rule_name_if_needed(OrderedChoice(*elements))
|
||||
|
||||
def _parse_sequence(self):
|
||||
"""
|
||||
a b c
|
||||
:return:
|
||||
"""
|
||||
expr_and_modifier = self._parse_modifier()
|
||||
if self.token.type == TokenKind.EOF or \
|
||||
self.token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
if self.token is None or \
|
||||
self.token.type == TokenKind.EOF or \
|
||||
self.token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_whitespace()
|
||||
|
||||
sequence = self._parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return self._eat_rule_name_if_needed(Sequence(*elements))
|
||||
|
||||
def _parse_modifier(self):
|
||||
"""
|
||||
a? | a* | a+
|
||||
:return:
|
||||
"""
|
||||
expression = self._parse_expression()
|
||||
|
||||
if self.token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(Optional(expression))
|
||||
|
||||
if self.token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ZeroOrMore(expression))
|
||||
|
||||
if self.token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(OneOrMore(expression))
|
||||
|
||||
return expression
|
||||
|
||||
def _parse_expression(self):
|
||||
if self.token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEof("lpar | concept | ident | string | regrex", self.token))
|
||||
|
||||
if self.token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expr = self._parse_choice()
|
||||
if self.token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
else:
|
||||
self.add_error(UnexpectedToken(self.token, TokenKind.RPAR))
|
||||
return expr
|
||||
|
||||
if self.token.type == TokenKind.CONCEPT:
|
||||
concept_name, concept_id = self.token.value
|
||||
metadata = self.context.sheerka.get_by_id(concept_id) if concept_id \
|
||||
else self.context.sheerka.get_by_name(concept_name)
|
||||
|
||||
if metadata is NotFound:
|
||||
self.add_error(UnknownConcept(concept_id, concept_name))
|
||||
self.next_token()
|
||||
return None
|
||||
|
||||
expr = ConceptExpression(metadata.id, rule_name=metadata.name)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
|
||||
if self.token.type == TokenKind.IDENTIFIER:
|
||||
|
||||
concept_name = self.token.str_value
|
||||
|
||||
if concept_name == self.concept_name:
|
||||
# recursive construction, the concept id is not known yet
|
||||
expr = ConceptExpression(None, rule_name=concept_name)
|
||||
|
||||
elif (metadata := self.context.sheerka.get_by_name(concept_name)) is NotFound:
|
||||
# unknown concept, it's a variable definition
|
||||
expr = VariableExpression(concept_name)
|
||||
|
||||
else:
|
||||
expr = ConceptExpression(metadata.id, rule_name=concept_name)
|
||||
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
|
||||
if self.token.type == TokenKind.STRING:
|
||||
tokens = list(Tokenizer(self.token.strip_quote, yield_eof=False))
|
||||
if len(tokens) == 1:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
|
||||
|
||||
else:
|
||||
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
|
||||
elements[-1].skip_white_space = True
|
||||
ret = Sequence(*elements)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
if self.token.type == TokenKind.REGEX:
|
||||
ret = RegExMatch(self.token.strip_quote)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
ret = StrMatch(self.token.strip_quote)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
def _eat_rule_name_if_needed(self, expression):
|
||||
|
||||
if self.token.type == TokenKind.EQUALS:
|
||||
self.next_token() # eat equals
|
||||
|
||||
if self.token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(UnexpectedToken(self.token, TokenKind.IDENTIFIER))
|
||||
|
||||
expression.rule_name = self.token.value
|
||||
self.next_token()
|
||||
|
||||
return expression
|
||||
Reference in New Issue
Block a user