Fixed some bugs

This commit is contained in:
2020-08-27 18:54:28 +02:00
parent 351c16f946
commit 37cd3ed757
27 changed files with 685 additions and 189 deletions
+232 -72
View File
@@ -10,14 +10,13 @@ from collections import defaultdict
from dataclasses import dataclass
from operator import attrgetter
import core.utils
import core.builtin_helpers
from cache.Cache import Cache
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts
from core.concept import DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts, Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import BaseNodeParser, GrammarErrorNode, UnrecognizedTokensNode, ConceptNode, LexerNode
from parsers.BaseParser import BaseParser
PARSERS = ["AtomNode", "SyaNode", "Python"]
@@ -147,8 +146,16 @@ class MultiNode:
class ParsingExpression:
log_sink = []
@classmethod
def reset_logs(cls):
cls.log_sink.clear()
def __init__(self, *args, **kwargs):
self.elements = args
self.debug_enabled = False
self._has_unordered_choice = None
nodes = kwargs.get('nodes', []) or []
if not hasattr(nodes, '__iter__'):
@@ -178,11 +185,95 @@ class ParsingExpression:
def parse(self, parser):
# TODO : add memoization
return self._parse(parser)
if self.debug_enabled:
self.debug(f">> {parser.pos:3d} : {self}")
res = self._parse(parser)
return res
def add_rule_name_if_needed(self, text):
return text + "=" + self.rule_name if self.rule_name else text
def has_unordered_choice(self):
if self._has_unordered_choice is None:
visitor = HasUnorderedChoiceVisitor()
visitor.visit(self)
self._has_unordered_choice = visitor.value
return self._has_unordered_choice
def debug(self, msg):
self.log_sink.append((id(self), msg))
def get_debug(self):
if not self.debug_enabled:
return None
# search for the first debug line for the current pexpression
id_self = id(self)
for i, line in enumerate(self.log_sink):
if line[0] == id_self:
break
else:
return ""
n, debug = self.inner_get_debug(i, "")
self.log_sink.clear()
return debug
def inner_get_debug(self, n, tab=""):
"""
:param n: line number
:param tab: current indentation
:return:
"""
if not self.debug:
return None
id_self = id(self)
def add_debug_for_current(_n, _debug):
if n >= len(self.log_sink):
return _n, _debug
_l = self.log_sink[_n]
while _l[0] == id_self:
_debug += tab + _l[1] + "\n"
_n += 1
if _n == len(self.log_sink):
return _n, _debug
_l = self.log_sink[_n]
return _n, _debug
# if n >= len(self.log_sink):
# return n, None
#
# line = self.log_sink[n]
#
# if line[0] != id_self:
# # return n, f"{tab}>> No log for {self}\n"
# return n, None
debug = ""
n, debug = add_debug_for_current(n, debug)
# while line[0] == id_self:
# debug += tab + line[1] + "\n"
# n += 1
# if n == len(self.log_sink):
# return n, debug
# line = self.log_sink[n]
for node in self.nodes:
n, node_debug = node.inner_get_debug(n, tab + " ")
if node_debug:
debug += node_debug
n, debug = add_debug_for_current(n, debug)
return n, debug
class ConceptExpression(ParsingExpression):
"""
@@ -234,6 +325,10 @@ class ConceptExpression(ParsingExpression):
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
@staticmethod
def get_recurse_id(parent_id, concept_id, rule_name):
return f"{parent_id}#{concept_id}({rule_name})"
class Sequence(ParsingExpression):
"""
@@ -277,6 +372,8 @@ class Sequence(ParsingExpression):
parsing_contexts.extend(to_append)
if len(parsing_contexts) == 0:
if self.debug_enabled:
self.debug(f"<< Failed matching {e}")
return None
to_append.clear()
@@ -290,8 +387,12 @@ class Sequence(ParsingExpression):
pcontext.fix_tokens(parser_helper)
if len(parsing_contexts) == 1:
if self.debug_enabled:
self.debug(f"<< Found match '{parsing_contexts[0].node.source}'")
return parsing_contexts[0].node
if self.debug_enabled:
self.debug(f"<< Found matches {[r.node.source for r in parsing_contexts]}")
return MultiNode(parsing_contexts)
def __repr__(self):
@@ -537,8 +638,7 @@ class Match(ParsingExpression):
super(Match, self).__init__(rule_name=rule_name, root=root)
def parse(self, parser):
result = self._parse(parser)
return result
return self._parse(parser)
class StrMatch(Match):
@@ -573,14 +673,19 @@ class StrMatch(Match):
def _parse(self, parser_helper):
token = parser_helper.get_token()
m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \
else token.strip_quote == self.to_match
if m:
if self.debug_enabled:
self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => Matched")
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
parser_helper.next_token(self.skip_white_space)
return node
if self.debug_enabled:
self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => No Match")
return None
@@ -646,7 +751,6 @@ class StrMatch(Match):
# parser.dprint("-- NoMatch at {}".format(c_pos))
# parser._nm_raise(self, c_pos, parser)
class ParsingExpressionVisitor:
"""
visit ParsingExpression
@@ -654,9 +758,22 @@ class ParsingExpressionVisitor:
STOP = "##_Stop_##"
def __init__(self, get_nodes=None, circular_ref_strategy=None):
self.get_nodes = get_nodes or (lambda pe: pe.elements)
self.circular_ref_strategy = circular_ref_strategy
self.seen = set() if circular_ref_strategy else None
def visit(self, parsing_expression):
name = parsing_expression.__class__.__name__
if self.circular_ref_strategy:
if id(parsing_expression) in self.seen:
if self.circular_ref_strategy == "skip":
return
raise RecursionError(f"circular ref detected : {self}")
self.seen.add(id(parsing_expression))
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(parsing_expression)
@@ -665,7 +782,7 @@ class ParsingExpressionVisitor:
if hasattr(self, "visit_all"):
self.visit_all(parsing_expression)
for node in parsing_expression.elements:
for node in self.get_nodes(parsing_expression):
if isinstance(node, Concept):
res = self.visit(ConceptExpression(node.key or node.name))
elif isinstance(node, str):
@@ -679,6 +796,7 @@ class ParsingExpressionVisitor:
class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
def __init__(self, sheerka):
super().__init__()
self.sheerka = sheerka
self.first_tokens = None
@@ -713,12 +831,29 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor):
def __init__(self):
super().__init__()
self.references = []
def visit_ConceptExpression(self, pe):
self.references.append(pe.concept)
class HasUnorderedChoiceVisitor(ParsingExpressionVisitor):
def __init__(self):
super().__init__(lambda pe: pe.nodes, circular_ref_strategy="skip")
self.value = False
def __repr__(self):
return f"HasUnorderedChoiceVisitor(={self.value})"
def reset(self):
self.value = False
def visit_UnOrderedChoice(self, parsing_expression):
self.value = True
return ParsingExpressionVisitor.STOP
class BnfConceptParserHelper:
def __init__(self, parser):
self.parser = parser
@@ -806,7 +941,6 @@ class BnfConceptParserHelper:
if isinstance(node, MultiNode):
# when multiple choices are found, use the longest result
node = node.results[0].node
if node is not None and node.end != -1:
self.sequence.append(self.create_concept_node(concept, node))
self.pos = node.end
@@ -835,7 +969,7 @@ class BnfConceptParserHelper:
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
nodes_sequences = core.builtin_helpers.get_lexer_nodes_from_unrecognized(
self.parser.context,
self.unrecognized_tokens,
PARSERS)
@@ -867,12 +1001,17 @@ class BnfConceptParserHelper:
clone.debug = self.debug[:]
self.errors = self.errors[:]
clone.sequence = self.sequence[:]
clone.pos = self.pos
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.has_unrecognized = self.has_unrecognized
clone.bnf_parsed = self.bnf_parsed
clone.pos = self.pos
return clone
def finalize(self):
if self.bnf_parsed > 0:
if self.bnf_parsed:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
@@ -883,13 +1022,12 @@ class BnfConceptParserHelper:
key = (template.key, template.id) if template.id else template.key
concept = sheerka.new(key)
concept = self.finalize_concept(sheerka, concept, underlying)
concept_node = ConceptNode(
concept,
underlying.start,
underlying.end,
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
concept_node = ConceptNode(concept,
underlying.start,
underlying.end,
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
return concept_node
def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True):
@@ -1015,6 +1153,7 @@ class BnfNodeParser(BaseNodeParser):
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.concepts_grammars = sheerka.concepts_grammars
self.sheerka = sheerka
else:
self.concepts_grammars = Cache()
@@ -1031,6 +1170,7 @@ class BnfNodeParser(BaseNodeParser):
@staticmethod
def get_valid(parsers_helpers):
valid_parser_helpers = []
for parser_helper in parsers_helpers:
if not parser_helper.bnf_parsed or parser_helper.has_error():
@@ -1146,7 +1286,7 @@ class BnfNodeParser(BaseNodeParser):
def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression):
"""
Check the newly created parsing expresion
Check the newly created parsing expression
Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop
Let's look for that
:param context:
@@ -1162,7 +1302,7 @@ class BnfNodeParser(BaseNodeParser):
for node_id in path_:
expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_
for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]:
if node.recurse_id == node_id or node.concept.id == node_id:
if node_id in (node.recurse_id, node.concept.id):
index_ = i
parent_ = expression_
expression_ = node # take the child of the ConceptExpression found
@@ -1220,17 +1360,22 @@ class BnfNodeParser(BaseNodeParser):
in_recursion.extend(already_found)
return True
already_found.append(id_to_use)
return self.check_for_infinite_recursion(
parsing_expression.nodes[0], already_found, in_recursion, only_first)
return self.check_for_infinite_recursion(parsing_expression.nodes[0],
already_found,
in_recursion,
only_first)
already_found_for_current_node = []
if isinstance(parsing_expression, Sequence):
# for sequence, we need to check all nodes
# for sequence, we need to check all nodes (unless, only first)
if only_first:
nodes = [] if len(parsing_expression.nodes) == 0 else [parsing_expression.nodes[0]]
else:
nodes = parsing_expression.nodes
for node in nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False):
return True
return False
@@ -1239,7 +1384,8 @@ class BnfNodeParser(BaseNodeParser):
# for ordered choice, if there is at least one node that does not resolved to a recursion
# we are safe
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
else:
@@ -1248,7 +1394,8 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(parsing_expression, UnOrderedChoice):
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found.copy())
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
return False
@@ -1278,7 +1425,8 @@ class BnfNodeParser(BaseNodeParser):
root_concept=concept,
desc=desc) as sub_context:
# get the parsing expression
ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_update)
to_skip = {concept.id}
ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_skip, to_update)
# check and update parsing expression that are still under construction
# Note that we only update the concept that will update concepts_grammars
@@ -1289,15 +1437,10 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(node, UnderConstruction):
pe.nodes[i] = grammar.get(node.concept_id)
# # check for infinite recursions.
# # and try to fix them when possible
# already_found = [concept.id]
# concepts_in_recursion = []
# if self.check_for_infinite_recursion(ret, already_found, concepts_in_recursion):
# chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
# for concept_id in concepts_in_recursion:
# grammar[concept_id] = chicken_anf_egg
# KSI 20200826
# To be rewritten into get_infinite_recursions
# I have changed resolve_concept_parsing_expression() to directly avoid obvious circular references
# So it's no longer need to search and fix them
concepts_in_recursion = self.fix_infinite_recursions(context, grammar, concept.id, ret)
if concepts_in_recursion:
chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
@@ -1307,54 +1450,67 @@ class BnfNodeParser(BaseNodeParser):
# update, in case of infinite circular recursion
ret = grammar[concept.id]
# finally, update concept grammar
# finally, update the list of the known pexpression (self.concepts_grammars)
# We do not add pexpressions that contain UnOrderedChoice because the choices always depend on the current
# concept.
# For example, the pexpression for 'twenties' found under the concept 'hundreds' won't be the same than
# the pexpression 'twenties' under the concept 'thousand' or even the pexpression 'twenties' without any
# context.
for k, v in grammar.items():
self.concepts_grammars.put(k, v)
# not quite sure that it is a good idea.
# Why do we want to corrupt previous valid entries ?
if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
if k == concept.id:
self.concepts_grammars.put(k, v)
elif context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
# not quite sure that it is a good idea.
# Why do we want to corrupt previous valid entries ?
self.concepts_grammars.put(k, v)
else:
if not v.has_unordered_choice():
self.concepts_grammars.put(k, v)
sub_context.add_values(return_values=ret)
return ret
def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_update):
def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_skip, to_update):
"""
:param context:
:param concept: concept
:param name: rule_name of the concept if exists
:param grammar: already resolved parsing expressions
:param to_update: parsing expressions that contains unresovled parsing expression
:param to_skip: list of concepts to skip in order to avoid circular references (only for UnOrderedChoice pe)
:param to_update: parsing expressions that contains unresolved parsing expression
:return:
"""
if context.sheerka.isaset(context, concept) and hasattr(context, "obj"):
key_to_use = f"{concept.id}#{name}#{context.obj.id}"
sheerka = context.sheerka
if sheerka.isaset(context, concept) and hasattr(context, "obj"):
key_to_use = ConceptExpression.get_recurse_id(context.obj.id, concept.id, name)
else:
key_to_use = concept.id
if key_to_use in self.concepts_grammars: # validated entry
return self.concepts_grammars.get(key_to_use)
if key_to_use in self.concepts_grammars:
# Use the global pexpression only if it does not contains UnOrderedChoice
pe = self.concepts_grammars.get(key_to_use)
if not pe.has_unordered_choice():
return self.concepts_grammars.get(key_to_use)
if key_to_use in grammar: # under construction entry
return grammar.get(key_to_use)
desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}"
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
if not concept.bnf: # to save a function call. Not sure it worth it.
if not concept.bnf: # 'if' is done outside to save a function call. Not sure it worth it.
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
grammar[key_to_use] = UnderConstruction(concept.id)
sheerka = context.sheerka
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
expression = concept.bnf
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
ssc.add_inputs(expression=expression)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)
ssc.add_values(return_values=resolved)
elif sheerka.isaset(context, concept):
@@ -1363,15 +1519,15 @@ class BnfNodeParser(BaseNodeParser):
ssc.add_inputs(concept=concept)
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
valid_concepts = []
for c in concepts_in_group:
if c.id == context.obj.id:
continue
if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
continue
valid_concepts.append(c)
valid_concepts = [c for c in concepts_in_group if c.id not in to_skip]
# for c in concepts_in_group:
# if c.id == context.obj.id:
# continue
#
# if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
# continue
#
# valid_concepts.append(c)
nodes = []
for c in valid_concepts:
@@ -1381,6 +1537,7 @@ class BnfNodeParser(BaseNodeParser):
resolved = self.resolve_parsing_expression(ssc,
UnOrderedChoice(*nodes),
grammar,
to_skip,
to_update)
ssc.add_values(concepts_in_group=concepts_in_group)
ssc.add_values(return_values=resolved)
@@ -1389,7 +1546,7 @@ class BnfNodeParser(BaseNodeParser):
desc = f"Concept is a simple concept."
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
expression = self.get_expression_from_concept_name(concept.name)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)
grammar[key_to_use] = resolved
@@ -1400,7 +1557,7 @@ class BnfNodeParser(BaseNodeParser):
sub_context.add_values(return_values=resolved)
return resolved
def resolve_parsing_expression(self, context, expression, grammar, to_update):
def resolve_parsing_expression(self, context, expression, grammar, to_skip, to_update):
if isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case)
@@ -1416,12 +1573,14 @@ class BnfNodeParser(BaseNodeParser):
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
return self.add_error(unknown_concept)
pe = self.resolve_concept_parsing_expression(
context,
concept,
expression.rule_name,
grammar,
to_update)
inner_to_skip = to_skip.copy()
inner_to_skip.add(concept.id)
pe = self.resolve_concept_parsing_expression(context,
concept,
expression.rule_name,
grammar,
inner_to_skip,
to_update)
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it
@@ -1447,7 +1606,7 @@ class BnfNodeParser(BaseNodeParser):
ret = expression
ret.nodes = []
for e in ret.elements:
pe = self.resolve_parsing_expression(context, e, grammar, to_update)
pe = self.resolve_parsing_expression(context, e, grammar, to_skip, to_update)
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it
if isinstance(pe, UnderConstruction):
@@ -1462,6 +1621,7 @@ class BnfNodeParser(BaseNodeParser):
expression.sep = self.resolve_parsing_expression(context,
expression.sep,
grammar,
to_skip,
to_update)
return ret