Fixed some bugs

This commit is contained in:
2020-08-27 18:54:28 +02:00
parent 351c16f946
commit 37cd3ed757
27 changed files with 685 additions and 189 deletions
+70 -17
View File
@@ -272,6 +272,9 @@ class SourceCodeWithConceptNode(LexerNode):
if id(self) == id(other):
return True
if isinstance(other, SCWC):
return other == self
if not isinstance(other, SourceCodeWithConceptNode):
return False
@@ -315,6 +318,10 @@ class SourceCodeWithConceptNode(LexerNode):
return self
def pseudo_fix_source(self):
"""
pseudo because the code is not that clean !
:return:
"""
self.source = self.first.source
for n in self.nodes:
self.source += " "
@@ -352,23 +359,6 @@ utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")
@dataclass(init=False)
class SCWC:
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
first: LexerNode
last: LexerNode
content: tuple
def __init__(self, first, last, *args):
self.first = first
self.last = last
self.content = args
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
@@ -439,6 +429,69 @@ class SCN(HelperWithPos):
return txt + ")"
class SCWC(HelperWithPos):
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
def __init__(self, first, last, *args):
super().__init__(None, None)
self.first = first
self.last = last
self.content = args
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, SourceCodeWithConceptNode):
if self.first != other.first:
return False
if self.last != other.last:
return False
if len(self.content) != len(other.nodes):
return False
for self_node, other_node in zip(self.content, other.nodes):
if self_node != other_node:
return False
# at last
return True
def __repr__(self):
txt = "SCWC("
if self.start is not None:
txt += f"start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
txt += f", source='{self.source}'"
return txt + ")"
@property
def source(self):
"""
this code is a copy and paste from SourceCodeWithConceptNode.pseudo_fix_source
TODO: create a common function or whatever...
:return:
"""
source = self.first.source
for n in self.content:
source += " "
if hasattr(n, "source"):
source += n.source
elif hasattr(n, "concept"):
source += str(n.concept)
else:
source += " unknown"
source += self.last.source
return source
class CN(HelperWithPos):
"""
ConceptNode tester class
+5 -29
View File
@@ -175,38 +175,14 @@ class BaseParser:
body=tree,
try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None):
@staticmethod
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
"""
Recreate back the source code from parser_input
:param parser_input: list of Tokens
:param custom_switcher: map of [TokenKind, overridden values]
:param tracker: keep track of the value overridden by custom_switcher
Extract the lexer node from the parser_input
:param parser_input:
:param expected_parser: returns the nodes if the parent parser is the expected one
:return:
"""
if isinstance(parser_input, list):
return self.get_text_from_tokens(parser_input, custom_switcher, tracker)
if isinstance(parser_input, ParserResultConcept):
parser_input = parser_input.source
if "c:" in parser_input:
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher, tracker)
return parser_input
def get_input_as_tokens(self, parser_input, strip_eof=False):
if isinstance(parser_input, list):
return self.manage_eof(parser_input, strip_eof)
if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens:
return self.manage_eof(parser_input.tokens, strip_eof)
else:
return Tokenizer(parser_input.source)
return Tokenizer(parser_input, yield_eof=not strip_eof)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept):
return None
+232 -72
View File
@@ -10,14 +10,13 @@ from collections import defaultdict
from dataclasses import dataclass
from operator import attrgetter
import core.utils
import core.builtin_helpers
from cache.Cache import Cache
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts
from core.concept import DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts, Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import BaseNodeParser, GrammarErrorNode, UnrecognizedTokensNode, ConceptNode, LexerNode
from parsers.BaseParser import BaseParser
PARSERS = ["AtomNode", "SyaNode", "Python"]
@@ -147,8 +146,16 @@ class MultiNode:
class ParsingExpression:
log_sink = []
@classmethod
def reset_logs(cls):
cls.log_sink.clear()
def __init__(self, *args, **kwargs):
self.elements = args
self.debug_enabled = False
self._has_unordered_choice = None
nodes = kwargs.get('nodes', []) or []
if not hasattr(nodes, '__iter__'):
@@ -178,11 +185,95 @@ class ParsingExpression:
def parse(self, parser):
# TODO : add memoization
return self._parse(parser)
if self.debug_enabled:
self.debug(f">> {parser.pos:3d} : {self}")
res = self._parse(parser)
return res
def add_rule_name_if_needed(self, text):
return text + "=" + self.rule_name if self.rule_name else text
def has_unordered_choice(self):
if self._has_unordered_choice is None:
visitor = HasUnorderedChoiceVisitor()
visitor.visit(self)
self._has_unordered_choice = visitor.value
return self._has_unordered_choice
def debug(self, msg):
self.log_sink.append((id(self), msg))
def get_debug(self):
if not self.debug_enabled:
return None
# search for the first debug line for the current pexpression
id_self = id(self)
for i, line in enumerate(self.log_sink):
if line[0] == id_self:
break
else:
return ""
n, debug = self.inner_get_debug(i, "")
self.log_sink.clear()
return debug
def inner_get_debug(self, n, tab=""):
"""
:param n: line number
:param tab: current indentation
:return:
"""
if not self.debug:
return None
id_self = id(self)
def add_debug_for_current(_n, _debug):
if n >= len(self.log_sink):
return _n, _debug
_l = self.log_sink[_n]
while _l[0] == id_self:
_debug += tab + _l[1] + "\n"
_n += 1
if _n == len(self.log_sink):
return _n, _debug
_l = self.log_sink[_n]
return _n, _debug
# if n >= len(self.log_sink):
# return n, None
#
# line = self.log_sink[n]
#
# if line[0] != id_self:
# # return n, f"{tab}>> No log for {self}\n"
# return n, None
debug = ""
n, debug = add_debug_for_current(n, debug)
# while line[0] == id_self:
# debug += tab + line[1] + "\n"
# n += 1
# if n == len(self.log_sink):
# return n, debug
# line = self.log_sink[n]
for node in self.nodes:
n, node_debug = node.inner_get_debug(n, tab + " ")
if node_debug:
debug += node_debug
n, debug = add_debug_for_current(n, debug)
return n, debug
class ConceptExpression(ParsingExpression):
"""
@@ -234,6 +325,10 @@ class ConceptExpression(ParsingExpression):
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
@staticmethod
def get_recurse_id(parent_id, concept_id, rule_name):
return f"{parent_id}#{concept_id}({rule_name})"
class Sequence(ParsingExpression):
"""
@@ -277,6 +372,8 @@ class Sequence(ParsingExpression):
parsing_contexts.extend(to_append)
if len(parsing_contexts) == 0:
if self.debug_enabled:
self.debug(f"<< Failed matching {e}")
return None
to_append.clear()
@@ -290,8 +387,12 @@ class Sequence(ParsingExpression):
pcontext.fix_tokens(parser_helper)
if len(parsing_contexts) == 1:
if self.debug_enabled:
self.debug(f"<< Found match '{parsing_contexts[0].node.source}'")
return parsing_contexts[0].node
if self.debug_enabled:
self.debug(f"<< Found matches {[r.node.source for r in parsing_contexts]}")
return MultiNode(parsing_contexts)
def __repr__(self):
@@ -537,8 +638,7 @@ class Match(ParsingExpression):
super(Match, self).__init__(rule_name=rule_name, root=root)
def parse(self, parser):
result = self._parse(parser)
return result
return self._parse(parser)
class StrMatch(Match):
@@ -573,14 +673,19 @@ class StrMatch(Match):
def _parse(self, parser_helper):
token = parser_helper.get_token()
m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \
else token.strip_quote == self.to_match
if m:
if self.debug_enabled:
self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => Matched")
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
parser_helper.next_token(self.skip_white_space)
return node
if self.debug_enabled:
self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => No Match")
return None
@@ -646,7 +751,6 @@ class StrMatch(Match):
# parser.dprint("-- NoMatch at {}".format(c_pos))
# parser._nm_raise(self, c_pos, parser)
class ParsingExpressionVisitor:
"""
visit ParsingExpression
@@ -654,9 +758,22 @@ class ParsingExpressionVisitor:
STOP = "##_Stop_##"
def __init__(self, get_nodes=None, circular_ref_strategy=None):
self.get_nodes = get_nodes or (lambda pe: pe.elements)
self.circular_ref_strategy = circular_ref_strategy
self.seen = set() if circular_ref_strategy else None
def visit(self, parsing_expression):
name = parsing_expression.__class__.__name__
if self.circular_ref_strategy:
if id(parsing_expression) in self.seen:
if self.circular_ref_strategy == "skip":
return
raise RecursionError(f"circular ref detected : {self}")
self.seen.add(id(parsing_expression))
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(parsing_expression)
@@ -665,7 +782,7 @@ class ParsingExpressionVisitor:
if hasattr(self, "visit_all"):
self.visit_all(parsing_expression)
for node in parsing_expression.elements:
for node in self.get_nodes(parsing_expression):
if isinstance(node, Concept):
res = self.visit(ConceptExpression(node.key or node.name))
elif isinstance(node, str):
@@ -679,6 +796,7 @@ class ParsingExpressionVisitor:
class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
def __init__(self, sheerka):
super().__init__()
self.sheerka = sheerka
self.first_tokens = None
@@ -713,12 +831,29 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor):
def __init__(self):
super().__init__()
self.references = []
def visit_ConceptExpression(self, pe):
self.references.append(pe.concept)
class HasUnorderedChoiceVisitor(ParsingExpressionVisitor):
def __init__(self):
super().__init__(lambda pe: pe.nodes, circular_ref_strategy="skip")
self.value = False
def __repr__(self):
return f"HasUnorderedChoiceVisitor(={self.value})"
def reset(self):
self.value = False
def visit_UnOrderedChoice(self, parsing_expression):
self.value = True
return ParsingExpressionVisitor.STOP
class BnfConceptParserHelper:
def __init__(self, parser):
self.parser = parser
@@ -806,7 +941,6 @@ class BnfConceptParserHelper:
if isinstance(node, MultiNode):
# when multiple choices are found, use the longest result
node = node.results[0].node
if node is not None and node.end != -1:
self.sequence.append(self.create_concept_node(concept, node))
self.pos = node.end
@@ -835,7 +969,7 @@ class BnfConceptParserHelper:
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
nodes_sequences = core.builtin_helpers.get_lexer_nodes_from_unrecognized(
self.parser.context,
self.unrecognized_tokens,
PARSERS)
@@ -867,12 +1001,17 @@ class BnfConceptParserHelper:
clone.debug = self.debug[:]
self.errors = self.errors[:]
clone.sequence = self.sequence[:]
clone.pos = self.pos
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.has_unrecognized = self.has_unrecognized
clone.bnf_parsed = self.bnf_parsed
clone.pos = self.pos
return clone
def finalize(self):
if self.bnf_parsed > 0:
if self.bnf_parsed:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
@@ -883,13 +1022,12 @@ class BnfConceptParserHelper:
key = (template.key, template.id) if template.id else template.key
concept = sheerka.new(key)
concept = self.finalize_concept(sheerka, concept, underlying)
concept_node = ConceptNode(
concept,
underlying.start,
underlying.end,
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
concept_node = ConceptNode(concept,
underlying.start,
underlying.end,
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
return concept_node
def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True):
@@ -1015,6 +1153,7 @@ class BnfNodeParser(BaseNodeParser):
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.concepts_grammars = sheerka.concepts_grammars
self.sheerka = sheerka
else:
self.concepts_grammars = Cache()
@@ -1031,6 +1170,7 @@ class BnfNodeParser(BaseNodeParser):
@staticmethod
def get_valid(parsers_helpers):
valid_parser_helpers = []
for parser_helper in parsers_helpers:
if not parser_helper.bnf_parsed or parser_helper.has_error():
@@ -1146,7 +1286,7 @@ class BnfNodeParser(BaseNodeParser):
def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression):
"""
Check the newly created parsing expresion
Check the newly created parsing expression
Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop
Let's look for that
:param context:
@@ -1162,7 +1302,7 @@ class BnfNodeParser(BaseNodeParser):
for node_id in path_:
expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_
for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]:
if node.recurse_id == node_id or node.concept.id == node_id:
if node_id in (node.recurse_id, node.concept.id):
index_ = i
parent_ = expression_
expression_ = node # take the child of the ConceptExpression found
@@ -1220,17 +1360,22 @@ class BnfNodeParser(BaseNodeParser):
in_recursion.extend(already_found)
return True
already_found.append(id_to_use)
return self.check_for_infinite_recursion(
parsing_expression.nodes[0], already_found, in_recursion, only_first)
return self.check_for_infinite_recursion(parsing_expression.nodes[0],
already_found,
in_recursion,
only_first)
already_found_for_current_node = []
if isinstance(parsing_expression, Sequence):
# for sequence, we need to check all nodes
# for sequence, we need to check all nodes (unless, only first)
if only_first:
nodes = [] if len(parsing_expression.nodes) == 0 else [parsing_expression.nodes[0]]
else:
nodes = parsing_expression.nodes
for node in nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False):
return True
return False
@@ -1239,7 +1384,8 @@ class BnfNodeParser(BaseNodeParser):
# for ordered choice, if there is at least one node that does not resolved to a recursion
# we are safe
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
else:
@@ -1248,7 +1394,8 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(parsing_expression, UnOrderedChoice):
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
already_found_for_current_node.clear()
already_found_for_current_node.extend(already_found.copy())
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
return False
@@ -1278,7 +1425,8 @@ class BnfNodeParser(BaseNodeParser):
root_concept=concept,
desc=desc) as sub_context:
# get the parsing expression
ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_update)
to_skip = {concept.id}
ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_skip, to_update)
# check and update parsing expression that are still under construction
# Note that we only update the concept that will update concepts_grammars
@@ -1289,15 +1437,10 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(node, UnderConstruction):
pe.nodes[i] = grammar.get(node.concept_id)
# # check for infinite recursions.
# # and try to fix them when possible
# already_found = [concept.id]
# concepts_in_recursion = []
# if self.check_for_infinite_recursion(ret, already_found, concepts_in_recursion):
# chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
# for concept_id in concepts_in_recursion:
# grammar[concept_id] = chicken_anf_egg
# KSI 20200826
# To be rewritten into get_infinite_recursions
# I have changed resolve_concept_parsing_expression() to directly avoid obvious circular references
# So it's no longer need to search and fix them
concepts_in_recursion = self.fix_infinite_recursions(context, grammar, concept.id, ret)
if concepts_in_recursion:
chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
@@ -1307,54 +1450,67 @@ class BnfNodeParser(BaseNodeParser):
# update, in case of infinite circular recursion
ret = grammar[concept.id]
# finally, update concept grammar
# finally, update the list of the known pexpression (self.concepts_grammars)
# We do not add pexpressions that contain UnOrderedChoice because the choices always depend on the current
# concept.
# For example, the pexpression for 'twenties' found under the concept 'hundreds' won't be the same than
# the pexpression 'twenties' under the concept 'thousand' or even the pexpression 'twenties' without any
# context.
for k, v in grammar.items():
self.concepts_grammars.put(k, v)
# not quite sure that it is a good idea.
# Why do we want to corrupt previous valid entries ?
if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
if k == concept.id:
self.concepts_grammars.put(k, v)
elif context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
# not quite sure that it is a good idea.
# Why do we want to corrupt previous valid entries ?
self.concepts_grammars.put(k, v)
else:
if not v.has_unordered_choice():
self.concepts_grammars.put(k, v)
sub_context.add_values(return_values=ret)
return ret
def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_update):
def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_skip, to_update):
"""
:param context:
:param concept: concept
:param name: rule_name of the concept if exists
:param grammar: already resolved parsing expressions
:param to_update: parsing expressions that contains unresovled parsing expression
:param to_skip: list of concepts to skip in order to avoid circular references (only for UnOrderedChoice pe)
:param to_update: parsing expressions that contains unresolved parsing expression
:return:
"""
if context.sheerka.isaset(context, concept) and hasattr(context, "obj"):
key_to_use = f"{concept.id}#{name}#{context.obj.id}"
sheerka = context.sheerka
if sheerka.isaset(context, concept) and hasattr(context, "obj"):
key_to_use = ConceptExpression.get_recurse_id(context.obj.id, concept.id, name)
else:
key_to_use = concept.id
if key_to_use in self.concepts_grammars: # validated entry
return self.concepts_grammars.get(key_to_use)
if key_to_use in self.concepts_grammars:
# Use the global pexpression only if it does not contains UnOrderedChoice
pe = self.concepts_grammars.get(key_to_use)
if not pe.has_unordered_choice():
return self.concepts_grammars.get(key_to_use)
if key_to_use in grammar: # under construction entry
return grammar.get(key_to_use)
desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}"
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
if not concept.bnf: # to save a function call. Not sure it worth it.
if not concept.bnf: # 'if' is done outside to save a function call. Not sure it worth it.
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
grammar[key_to_use] = UnderConstruction(concept.id)
sheerka = context.sheerka
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
expression = concept.bnf
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
ssc.add_inputs(expression=expression)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)
ssc.add_values(return_values=resolved)
elif sheerka.isaset(context, concept):
@@ -1363,15 +1519,15 @@ class BnfNodeParser(BaseNodeParser):
ssc.add_inputs(concept=concept)
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
valid_concepts = []
for c in concepts_in_group:
if c.id == context.obj.id:
continue
if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
continue
valid_concepts.append(c)
valid_concepts = [c for c in concepts_in_group if c.id not in to_skip]
# for c in concepts_in_group:
# if c.id == context.obj.id:
# continue
#
# if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
# continue
#
# valid_concepts.append(c)
nodes = []
for c in valid_concepts:
@@ -1381,6 +1537,7 @@ class BnfNodeParser(BaseNodeParser):
resolved = self.resolve_parsing_expression(ssc,
UnOrderedChoice(*nodes),
grammar,
to_skip,
to_update)
ssc.add_values(concepts_in_group=concepts_in_group)
ssc.add_values(return_values=resolved)
@@ -1389,7 +1546,7 @@ class BnfNodeParser(BaseNodeParser):
desc = f"Concept is a simple concept."
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
expression = self.get_expression_from_concept_name(concept.name)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)
grammar[key_to_use] = resolved
@@ -1400,7 +1557,7 @@ class BnfNodeParser(BaseNodeParser):
sub_context.add_values(return_values=resolved)
return resolved
def resolve_parsing_expression(self, context, expression, grammar, to_update):
def resolve_parsing_expression(self, context, expression, grammar, to_skip, to_update):
if isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case)
@@ -1416,12 +1573,14 @@ class BnfNodeParser(BaseNodeParser):
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
return self.add_error(unknown_concept)
pe = self.resolve_concept_parsing_expression(
context,
concept,
expression.rule_name,
grammar,
to_update)
inner_to_skip = to_skip.copy()
inner_to_skip.add(concept.id)
pe = self.resolve_concept_parsing_expression(context,
concept,
expression.rule_name,
grammar,
inner_to_skip,
to_update)
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it
@@ -1447,7 +1606,7 @@ class BnfNodeParser(BaseNodeParser):
ret = expression
ret.nodes = []
for e in ret.elements:
pe = self.resolve_parsing_expression(context, e, grammar, to_update)
pe = self.resolve_parsing_expression(context, e, grammar, to_skip, to_update)
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it
if isinstance(pe, UnderConstruction):
@@ -1462,6 +1621,7 @@ class BnfNodeParser(BaseNodeParser):
expression.sep = self.resolve_parsing_expression(context,
expression.sep,
grammar,
to_skip,
to_update)
return ret
+10 -9
View File
@@ -5,8 +5,8 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
ConceptExpression, StrMatch
@dataclass()
@@ -295,14 +295,15 @@ class BnfParser(BaseParser):
self.next_token()
if BnfParser.is_expression_a_set(self.context, expression):
root_concept = self.context.search(
start_with_self=True,
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
get_obj=lambda ec: ec.action_context,
stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF)
root_concept = self.context.search(start_with_self=True,
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
get_obj=lambda ec: ec.action_context,
stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF)
root_concept = list(root_concept)
if root_concept and hasattr(root_concept[0], "id"):
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{root_concept[0].id}"
expression.recurse_id = expression.get_recurse_id(root_concept[0].id,
expression.concept.id,
expression.rule_name)
return expression
@@ -313,7 +314,7 @@ class BnfParser(BaseParser):
@staticmethod
def update_recurse_id(context, concept_id, expression):
if BnfParser.is_expression_a_set(context, expression):
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{concept_id}"
expression.recurse_id = expression.get_recurse_id(concept_id, expression.concept.id, expression.rule_name)
for element in expression.elements:
BnfParser.update_recurse_id(context, concept_id, element)
+1 -1
View File
@@ -6,8 +6,8 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import LexerError, TokenKind
from parsers.BaseNodeParser import ConceptNode
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
+13 -2
View File
@@ -1,7 +1,8 @@
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import ConceptNode
from parsers.BaseNodeParser import ConceptNode
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
@@ -21,6 +22,16 @@ class PythonWithConceptsParser(BaseParser):
res += c if c.isalnum() else "0"
return res
@staticmethod
def get_nodes(nodes):
for node in nodes:
if isinstance(node, SourceCodeWithConceptNode):
yield node.first
yield from node.nodes
yield node.last
else:
yield node
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
@@ -63,7 +74,7 @@ class PythonWithConceptsParser(BaseParser):
identifiers[id(c)] = identifier
return identifier
for node in nodes:
for node in self.get_nodes(nodes):
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
+1 -1
View File
@@ -9,7 +9,7 @@ class ShortTermMemoryParser(BaseParser):
"""
def __init__(self, **kwargs):
super().__init__("shortTermMemory", 85)
super().__init__("ShortTermMemory", 85)
def parse(self, context, parser_input):
"""
+26 -7
View File
@@ -125,6 +125,11 @@ class SyaConceptParserHelper:
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
def is_next(self, token):
"""
To match long named concepts
:param token:
:return:
"""
if self.is_matched() or len(self.expected) == 0:
return False
@@ -294,8 +299,9 @@ class InFixToPostFix:
else:
self.out.append(item)
# put the item to the list of awaiting parameters
self.parameters_list.append(item)
# put the item to the list of awaiting parameters only if it's not the end of function marker
if item != ")":
self.parameters_list.append(item)
if len(self._concepts()) > 0:
# try to predict the final position of the current concept
@@ -339,9 +345,18 @@ class InFixToPostFix:
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def get_errors(self):
def has_error(item):
if isinstance(item, SyaConceptParserHelper) and item.error:
return True
if isinstance(item, SourceCodeWithConceptNode):
for n in item.nodes:
if hasattr(n, "error") and n.error:
return True
return False
res = []
res.extend(self.errors)
res.extend([item for item in self.out if isinstance(item, SyaConceptParserHelper) and item.error])
res.extend([item for item in self.out if has_error(item)])
return res
def lock(self):
@@ -367,8 +382,8 @@ class InFixToPostFix:
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
# There are more parameters than needed by the new concept
# The others are either
# - parameters for the previous concept (if any)
# These others parameters are either
# - parameters for the previous suffixed concept (if any)
# - concepts on their own
# - syntax error
# In all the cases, the only thing that matter is to pop what is expected by the new concept
@@ -461,7 +476,7 @@ class InFixToPostFix:
"""
The unrecognized ends with an lpar '('
It means that its a function like foo(something)
The problem is that we need to know if there are other conceps before the function
The problem is that we need to know if there are other concepts before the function
ex : suffix one function(x)
suffix and one are not / may not be part of the name of the function
@@ -585,7 +600,7 @@ class InFixToPostFix:
del (current_concept.expected[0])
else:
# error
# We are not parsing the concept we tought we were parsing.
# We are not parsing the concept we thought we were parsing.
# Transform the eaten tokens into unrecognized
# and discard the current SyaConceptParserHelper
# TODO: manage the pending LPAR, RPAR ?
@@ -697,6 +712,10 @@ class InFixToPostFix:
for to_out in parsing_res.to_out:
instance._put_to_out(to_out)
# make sure to pop the current concept
if self._stack_isinstance(SyaConceptParserHelper):
self.pop_stack_to_out()
instance._put_to_out(")") # mark where the function should end
instance.stack.append(parsing_res.function)
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
+50 -11
View File
@@ -1,13 +1,13 @@
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
from core.concept import Concept
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
import core.utils
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
@dataclass()
@@ -64,7 +64,18 @@ class UnrecognizedNodeParser(BaseParser):
elif isinstance(node, SourceCodeNode):
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True # never trust source code not. I may be an invalid source code
has_unrecognized = True # to let PythonWithConceptParser validate the code
elif isinstance(node, SourceCodeWithConceptNode):
for i, n in [(i, n) for i, n in enumerate(node.nodes) if isinstance(n, ConceptNode)]:
res = self.validate_concept_node(context, n)
if not res.status:
self.add_error(res.body)
break
else:
node.nodes[i] = res.body
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True # to let PythonWithConceptParser validate the code
else: # cannot happen as of today :-)
raise NotImplementedError(f"Node is {type(node)}, which is not supported yet")
@@ -104,19 +115,47 @@ class UnrecognizedNodeParser(BaseParser):
:param concept:
:return:
"""
for name, value in concept.compiled.items():
if isinstance(value, Concept):
_validate_concept(value)
for k, v in concept.compiled.items():
if isinstance(v, Concept):
_validate_concept(v)
elif isinstance(value, UnrecognizedTokensNode):
res = parse_unrecognized(context, value.source, PARSERS)
elif isinstance(v, UnrecognizedTokensNode):
res = parse_unrecognized(context, v.source, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[name] = res.body.body
concept.compiled[k] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
def _get_source(compiled, var_name):
if var_name not in compiled:
return None
if not isinstance(compiled[var_name], list):
return None
if not len(compiled[var_name]) == 1:
return None
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
return None
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
return None
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
return None
return compiled[var_name][0].body.source
_validate_concept(concept_node.concept)
# Special case where the values of the variables are the names of the variable
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
for name, value in concept_node.concept.metadata.variables:
if not _get_source(concept_node.concept.compiled, name) == name:
break
else:
concept_node.concept.metadata.is_evaluated = True
if len(errors) > 0:
return context.sheerka.ret(self.name, False, errors)
else: