Files
Sheerka-Old/tests/parsers/parsers_utils.py
T

151 lines
5.9 KiB
Python

from core.concept import CC, Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN
from parsers.SyaNodeParser import SyaConceptParserHelper
def _index(tokens, expr, index):
"""
Finds a sub list in a bigger list
:param tokens:
:param expr:
:param index:
:return:
"""
expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j] != expected[j]:
break
else:
if index == 0:
return i, len(expected)
else:
index -= 1
raise ValueError(f"substring '{expr}' not found")
def compute_debug_array(res):
to_compare = []
for r in res:
res_debug = []
for token in r.debug:
if isinstance(token, Token):
if token.type == TokenKind.WHITESPACE:
continue
else:
res_debug.append(token.value)
else:
res_debug.append(token.concept.name)
to_compare.append(res_debug)
return to_compare
def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
:param expression_as_tokens: full expression
:param sub_expr: sub expression to search in the full expression
:param concepts_map: hash of the known concepts
:param concept_key: key of the concept if different from sub_expr
:param skip: number of occurrences of sub_expr to skip
:param is_bnf: True if the concept to search is a bnf definition
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
:return:
"""
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, (scnode, utnode)):
return sub_expr
if isinstance(sub_expr, cnode):
# for cnode, map the concept key to the one from concepts_maps if needed
if sub_expr.concept_key.startswith("#"):
return cnode(
concepts_map[sub_expr.concept_key[1:]].key,
sub_expr.start,
sub_expr.end,
sub_expr.source
)
else:
return sub_expr
if isinstance(sub_expr, SCWC):
first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya)
last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya)
content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content]
return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions
new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \
else node
sub_expr.compiled[k] = new_value
sub_expr.fix_pos(node)
if hasattr(sub_expr, "fix_source"):
sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1])
return sub_expr
if isinstance(sub_expr, UTN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source)
sub_expr.fix_pos(node)
return sub_expr
if isinstance(sub_expr, short_cnode):
return get_node(concepts_map, expression_as_tokens, sub_expr.source,
concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya)
if isinstance(sub_expr, tuple):
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
start, length = _index(expression_as_tokens, sub_expr, skip)
# special case of python source code
if "+" in sub_expr and sub_expr.strip() != "+":
return scnode(start, start + length - 1, sub_expr)
# try to match one of the concept from the map
concept_key = concept_key or sub_expr
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if not sya or len(concept_found.metadata.props) == 0 or is_bnf:
# if it's an atom, then return a ConceptNode
return CN(concept_found, start, start + length - 1, source=sub_expr)
else:
# else return a ParserHelper
return SyaConceptParserHelper(concept_found, start)
else:
# else an UnrecognizedTokensNode
return utnode(start, start + length - 1, sub_expr)
def compute_expected_array(concepts_map, expression, expected, sya=False):
"""
Computes a simple but sufficient version of the result of infix_to_postfix()
:param concepts_map:
:param expression:
:param expected:
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
:return:
"""
expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF]
return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected]