Added basic implentation for where
This commit is contained in:
@@ -2,8 +2,9 @@ from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from core.tokenizer import TokenKind, Keywords, Token
|
||||
from core.sheerka_logger import get_logger
|
||||
import core.utils
|
||||
import logging
|
||||
|
||||
|
||||
@@ -35,8 +36,34 @@ class ErrorNode(Node):
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
message: str
|
||||
token: Token
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if self.token.type != other.token.type or self.token.value != other.token.value:
|
||||
return False
|
||||
|
||||
if len(self.expected_tokens) != len(other.expected_tokens):
|
||||
return False
|
||||
|
||||
for i, t in enumerate(self.expected_tokens):
|
||||
if t != other.expected_tokens[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.message, self.token, self.expected_tokens))
|
||||
|
||||
|
||||
class BaseParser:
|
||||
PREFIX = "parsers."
|
||||
@@ -108,7 +135,7 @@ class BaseParser:
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
|
||||
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
|
||||
+39
-24
@@ -5,7 +5,8 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
StrMatch, ConceptGroupExpression
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -119,11 +120,11 @@ class BnfParser(BaseParser):
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parser_outer_rule_name()
|
||||
tree = self.parse_choice()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
@@ -136,10 +137,11 @@ class BnfParser(BaseParser):
|
||||
|
||||
return ret
|
||||
|
||||
def parser_outer_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_choice)
|
||||
|
||||
def parse_choice(self):
|
||||
"""
|
||||
a | b | c
|
||||
:return:
|
||||
"""
|
||||
sequence = self.parse_sequence()
|
||||
|
||||
self.eat_white_space()
|
||||
@@ -159,9 +161,13 @@ class BnfParser(BaseParser):
|
||||
sequence = self.parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return OrderedChoice(*elements)
|
||||
return self.eat_rule_name_if_needed(OrderedChoice(*elements))
|
||||
|
||||
def parse_sequence(self):
|
||||
"""
|
||||
a b c
|
||||
:return:
|
||||
"""
|
||||
expr_and_modifier = self.parse_modifier()
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
@@ -185,30 +191,31 @@ class BnfParser(BaseParser):
|
||||
sequence = self.parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return Sequence(*elements)
|
||||
return self.eat_rule_name_if_needed(Sequence(*elements))
|
||||
|
||||
def parse_modifier(self):
|
||||
expression = self.parser_inner_rule_name()
|
||||
"""
|
||||
a? | a* | a+
|
||||
:return:
|
||||
"""
|
||||
expression = self.parse_expression()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
if token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return Optional(expression)
|
||||
return self.eat_rule_name_if_needed(Optional(expression))
|
||||
|
||||
if token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return ZeroOrMore(expression)
|
||||
return self.eat_rule_name_if_needed(ZeroOrMore(expression))
|
||||
|
||||
if token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return OneOrMore(expression)
|
||||
return self.eat_rule_name_if_needed(OneOrMore(expression))
|
||||
|
||||
return expression
|
||||
|
||||
def parser_inner_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_expression)
|
||||
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
@@ -216,15 +223,21 @@ class BnfParser(BaseParser):
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expression = self.parse_choice()
|
||||
expr = self.parse_choice()
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return expression
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
|
||||
return expression
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
return expr
|
||||
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
self.next_token()
|
||||
concept = self.sheerka.new((token.value[0], token.value[1]))
|
||||
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
self.next_token()
|
||||
@@ -247,14 +260,15 @@ class BnfParser(BaseParser):
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
return concept
|
||||
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
|
||||
expr.rule_name = concept.name
|
||||
return expr
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return ret
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
def parser_rule_name(self, next_to_parse):
|
||||
expression = next_to_parse()
|
||||
def eat_rule_name_if_needed(self, expression):
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.EQUALS:
|
||||
return expression
|
||||
@@ -263,7 +277,8 @@ class BnfParser(BaseParser):
|
||||
token = self.get_token()
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
|
||||
return self.add_error(
|
||||
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
|
||||
@@ -243,6 +243,9 @@ class ParsingExpression:
|
||||
def parse(self, parser):
|
||||
return self._parse(parser)
|
||||
|
||||
def add_rule_name_if_needed(self, text):
|
||||
return text + "=" + self.rule_name if self.rule_name else text
|
||||
|
||||
|
||||
class ConceptExpression(ParsingExpression):
|
||||
"""
|
||||
@@ -257,7 +260,7 @@ class ConceptExpression(ParsingExpression):
|
||||
self.concept = concept
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.concept}"
|
||||
return self.add_rule_name_if_needed(f"{self.concept}")
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
@@ -352,7 +355,7 @@ class Sequence(ParsingExpression):
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
return self.add_rule_name_if_needed(f"({to_str})")
|
||||
|
||||
|
||||
class OrderedChoice(ParsingExpression):
|
||||
@@ -375,7 +378,7 @@ class OrderedChoice(ParsingExpression):
|
||||
|
||||
def __repr__(self):
|
||||
to_str = "| ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
return self.add_rule_name_if_needed(f"({to_str})")
|
||||
|
||||
|
||||
class Optional(ParsingExpression):
|
||||
@@ -413,7 +416,7 @@ class Optional(ParsingExpression):
|
||||
return f"{self.elements[0]}?"
|
||||
else:
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})?"
|
||||
return self.add_rule_name_if_needed(f"({to_str})?")
|
||||
|
||||
|
||||
class Repetition(ParsingExpression):
|
||||
@@ -467,7 +470,7 @@ class ZeroOrMore(Repetition):
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})*"
|
||||
return self.add_rule_name_if_needed(f"({to_str})*")
|
||||
|
||||
|
||||
class OneOrMore(Repetition):
|
||||
@@ -507,7 +510,7 @@ class OneOrMore(Repetition):
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})+"
|
||||
return self.add_rule_name_if_needed(f"({to_str})+")
|
||||
|
||||
|
||||
class UnorderedGroup(Repetition):
|
||||
@@ -541,13 +544,13 @@ class StrMatch(Match):
|
||||
Matches a literal
|
||||
"""
|
||||
|
||||
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
|
||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
def __init__(self, to_match, rule_name="", ignore_case=True):
|
||||
super(Match, self).__init__(rule_name=rule_name)
|
||||
self.to_match = to_match
|
||||
self.ignore_case = ignore_case
|
||||
|
||||
def __repr__(self):
|
||||
return f"'{self.to_match}'"
|
||||
return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
@@ -699,10 +702,14 @@ class ConceptLexerParser(BaseParser):
|
||||
else:
|
||||
ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
concepts_to_resolve.add(expression)
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
|
||||
if expression.rule_name is None or expression.rule_name == "":
|
||||
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
else expression.concept
|
||||
if isinstance(expression.concept, str):
|
||||
concept = self.get_concept(expression.concept)
|
||||
if self.sheerka.is_known(concept):
|
||||
expression.concept = concept
|
||||
concepts_to_resolve.add(expression.concept)
|
||||
ret = expression
|
||||
elif isinstance(expression, str):
|
||||
@@ -955,6 +962,47 @@ class ConceptLexerParser(BaseParser):
|
||||
|
||||
return concept
|
||||
|
||||
def encode_grammar(self, grammar):
|
||||
"""
|
||||
Transform the grammar into something that can easily can be serialized
|
||||
:param grammar:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _encode(expression):
|
||||
if isinstance(expression, StrMatch):
|
||||
res = f"'{expression.to_match}'"
|
||||
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
res = core.utils.str_concept(expression.concept)
|
||||
|
||||
elif isinstance(expression, Sequence):
|
||||
res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
|
||||
|
||||
elif isinstance(expression, OrderedChoice):
|
||||
res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
|
||||
|
||||
elif isinstance(expression, Optional):
|
||||
res = _encode(expression.nodes[0]) + "?"
|
||||
|
||||
elif isinstance(expression, ZeroOrMore):
|
||||
res = _encode(expression.nodes[0]) + "*"
|
||||
|
||||
elif isinstance(expression, OneOrMore):
|
||||
res = _encode(expression.nodes[0]) + "+"
|
||||
|
||||
if expression.rule_name:
|
||||
res += "=" + expression.rule_name
|
||||
|
||||
return res
|
||||
|
||||
result = {}
|
||||
for k, v in grammar.items():
|
||||
key = core.utils.str_concept(k)
|
||||
value = _encode(v)
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def get_bests(results):
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
import ast
|
||||
import logging
|
||||
import core.utils
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
|
||||
@@ -71,7 +72,7 @@ class PythonParser(BaseParser):
|
||||
tree = None
|
||||
|
||||
python_switcher = {
|
||||
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
|
||||
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value, True)
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -136,6 +137,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
def visit_Name(self, node):
|
||||
self.names.add(node.id)
|
||||
|
||||
|
||||
class LexerNodeParserHelperForPython:
|
||||
"""Helper class to parse mix of concepts and Python"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user