Added basic implentation for where

This commit is contained in:
2020-02-05 18:47:20 +01:00
parent a5a721094b
commit afc1e22949
35 changed files with 864 additions and 320 deletions
+29 -2
View File
@@ -2,8 +2,9 @@ from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords
from core.tokenizer import TokenKind, Keywords, Token
from core.sheerka_logger import get_logger
import core.utils
import logging
@@ -35,8 +36,34 @@ class ErrorNode(Node):
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
token: Token
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenErrorNode):
return False
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
class BaseParser:
PREFIX = "parsers."
@@ -108,7 +135,7 @@ class BaseParser:
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
if custom_switcher:
+39 -24
View File
@@ -5,7 +5,8 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
@dataclass()
@@ -119,11 +120,11 @@ class BnfParser(BaseParser):
tree = None
try:
self.reset_parser(context, text)
tree = self.parser_outer_rule_name()
tree = self.parse_choice()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
except LexerError as e:
self.add_error(e, False)
@@ -136,10 +137,11 @@ class BnfParser(BaseParser):
return ret
def parser_outer_rule_name(self):
return self.parser_rule_name(self.parse_choice)
def parse_choice(self):
"""
a | b | c
:return:
"""
sequence = self.parse_sequence()
self.eat_white_space()
@@ -159,9 +161,13 @@ class BnfParser(BaseParser):
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
return self.eat_rule_name_if_needed(OrderedChoice(*elements))
def parse_sequence(self):
"""
a b c
:return:
"""
expr_and_modifier = self.parse_modifier()
token = self.get_token()
if token is None or \
@@ -185,30 +191,31 @@ class BnfParser(BaseParser):
sequence = self.parse_modifier()
elements.append(sequence)
return Sequence(*elements)
return self.eat_rule_name_if_needed(Sequence(*elements))
def parse_modifier(self):
expression = self.parser_inner_rule_name()
"""
a? | a* | a+
:return:
"""
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
return self.eat_rule_name_if_needed(Optional(expression))
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
return self.eat_rule_name_if_needed(ZeroOrMore(expression))
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return self.eat_rule_name_if_needed(OneOrMore(expression))
return expression
def parser_inner_rule_name(self):
return self.parser_rule_name(self.parse_expression)
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
@@ -216,15 +223,21 @@ class BnfParser(BaseParser):
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
expr = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
return self.eat_rule_name_if_needed(expr)
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
return expression
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
return expr
if token.type == TokenKind.CONCEPT:
self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.IDENTIFIER:
self.next_token()
@@ -247,14 +260,15 @@ class BnfParser(BaseParser):
body=("key", concept_name)))
return None
else:
return concept
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
expr.rule_name = concept.name
return expr
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
return self.eat_rule_name_if_needed(ret)
def parser_rule_name(self, next_to_parse):
expression = next_to_parse()
def eat_rule_name_if_needed(self, expression):
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
@@ -263,7 +277,8 @@ class BnfParser(BaseParser):
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
return self.add_error(
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
+58 -10
View File
@@ -243,6 +243,9 @@ class ParsingExpression:
def parse(self, parser):
return self._parse(parser)
def add_rule_name_if_needed(self, text):
return text + "=" + self.rule_name if self.rule_name else text
class ConceptExpression(ParsingExpression):
"""
@@ -257,7 +260,7 @@ class ConceptExpression(ParsingExpression):
self.concept = concept
def __repr__(self):
return f"{self.concept}"
return self.add_rule_name_if_needed(f"{self.concept}")
def __eq__(self, other):
if not super().__eq__(other):
@@ -352,7 +355,7 @@ class Sequence(ParsingExpression):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})"
return self.add_rule_name_if_needed(f"({to_str})")
class OrderedChoice(ParsingExpression):
@@ -375,7 +378,7 @@ class OrderedChoice(ParsingExpression):
def __repr__(self):
to_str = "| ".join(repr(n) for n in self.elements)
return f"({to_str})"
return self.add_rule_name_if_needed(f"({to_str})")
class Optional(ParsingExpression):
@@ -413,7 +416,7 @@ class Optional(ParsingExpression):
return f"{self.elements[0]}?"
else:
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})?"
return self.add_rule_name_if_needed(f"({to_str})?")
class Repetition(ParsingExpression):
@@ -467,7 +470,7 @@ class ZeroOrMore(Repetition):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})*"
return self.add_rule_name_if_needed(f"({to_str})*")
class OneOrMore(Repetition):
@@ -507,7 +510,7 @@ class OneOrMore(Repetition):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})+"
return self.add_rule_name_if_needed(f"({to_str})+")
class UnorderedGroup(Repetition):
@@ -541,13 +544,13 @@ class StrMatch(Match):
Matches a literal
"""
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
super(Match, self).__init__(rule_name=rule_name, root=root)
def __init__(self, to_match, rule_name="", ignore_case=True):
super(Match, self).__init__(rule_name=rule_name)
self.to_match = to_match
self.ignore_case = ignore_case
def __repr__(self):
return f"'{self.to_match}'"
return self.add_rule_name_if_needed(f"'{self.to_match}'")
def __eq__(self, other):
if not super().__eq__(other):
@@ -699,10 +702,14 @@ class ConceptLexerParser(BaseParser):
else:
ret = ConceptExpression(expression, rule_name=expression.name)
concepts_to_resolve.add(expression)
elif isinstance(expression, ConceptExpression):
elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
if expression.rule_name is None or expression.rule_name == "":
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
else expression.concept
if isinstance(expression.concept, str):
concept = self.get_concept(expression.concept)
if self.sheerka.is_known(concept):
expression.concept = concept
concepts_to_resolve.add(expression.concept)
ret = expression
elif isinstance(expression, str):
@@ -955,6 +962,47 @@ class ConceptLexerParser(BaseParser):
return concept
def encode_grammar(self, grammar):
"""
Transform the grammar into something that can easily can be serialized
:param grammar:
:return:
"""
def _encode(expression):
if isinstance(expression, StrMatch):
res = f"'{expression.to_match}'"
elif isinstance(expression, ConceptExpression):
res = core.utils.str_concept(expression.concept)
elif isinstance(expression, Sequence):
res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
elif isinstance(expression, OrderedChoice):
res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
elif isinstance(expression, Optional):
res = _encode(expression.nodes[0]) + "?"
elif isinstance(expression, ZeroOrMore):
res = _encode(expression.nodes[0]) + "*"
elif isinstance(expression, OneOrMore):
res = _encode(expression.nodes[0]) + "+"
if expression.rule_name:
res += "=" + expression.rule_name
return res
result = {}
for k, v in grammar.items():
key = core.utils.str_concept(k)
value = _encode(v)
result[key] = value
return result
@staticmethod
def get_bests(results):
"""
+4 -2
View File
@@ -1,9 +1,10 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass, field
from dataclasses import dataclass
import ast
import logging
import core.utils
from parsers.ConceptLexerParser import ConceptNode
@@ -71,7 +72,7 @@ class PythonParser(BaseParser):
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value, True)
}
try:
@@ -136,6 +137,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
def visit_Name(self, node):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""