Files
MyManagingTools/tests/helpers.py

768 lines
24 KiB
Python

import dataclasses
import json
import re
from collections import OrderedDict
from typing import Any
import numpy
import pandas as pd
from bs4 import BeautifulSoup
from fastcore.basics import NotStr
from fastcore.xml import to_xml
from fasthtml.components import html2ft, Div, Span
pattern = r"""(?P<tag>\w+)(?:#(?P<id>[\w-]+))?(?P<attributes>(?:\[\w+=['"]?[\w_-]+['"]?\])*)"""
attr_pattern = r"""\[(?P<name>\w+)=['"]?(?P<value>[\w_-]+)['"]?\]"""
svg_pattern = r"""svg name="(\w+)\""""
compiled_pattern = re.compile(pattern)
compiled_attr_pattern = re.compile(attr_pattern)
compiled_svg_pattern = re.compile(svg_pattern)
@dataclasses.dataclass
class DoNotCheck:
desc: str = None
class EmptyElement:
pass
@dataclasses.dataclass
class StartsWith:
"""
To check if the attribute starts with a specific value
"""
s: str
@dataclasses.dataclass
class Contains:
"""
To check if the attribute contains a specific value
"""
s: str
@dataclasses.dataclass
class JsonViewerNode:
is_expanded: bool | None
key: str
value: Any
debug_key: Any = None
debug_folding: Any = None
def find(self, path):
"""
Finds and returns a node in a hierarchical structure based on a dot-delimited path.
The method uses a recursive helper function to navigate through a tree-like
hierarchical node structure. Each node in the structure is assumed to potentially
have a "children" attribute, which is iterated to find matching keys in the path.
If, at any point, a node does not have the expected structure or the key is not
found within the children, the method will return None.
:param path: A dot-delimited string representing the hierarchical path to
the desired node (e.g., "root.child.subchild").
:return: The node in the hierarchy that matches the specified path or None
if no such node exists.
"""
def _find(node, path_parts):
if len(path_parts) == 0:
return node
element = node.value # to deal with ft element
if not hasattr(element, "children"):
return None
to_find = path_parts[0]
for child in element.children:
child_node = extract_jsonviewer_node(child)
if child_node is not None and child_node.key == to_find:
return _find(child_node, path_parts[1:])
return None
path_parts = path.split(".")
return _find(self, path_parts)
def text_value(self):
return str(self.value.children[0])
Empty = EmptyElement()
@dataclasses.dataclass
class HTMLElement:
tag: str
attrs: dict
children: list['HTMLElement'] = dataclasses.field(default_factory=list)
text: str | None = None
# Function to transform BeautifulSoup elements into the HTMLElement class
def parse_element(element) -> HTMLElement:
def _process_attributes(attrs):
return {key: ' '.join(value) if isinstance(value, list) else value for key, value in attrs.items()}
# Create an HTMLElement object for the current element
html_element = HTMLElement(
tag=element.name,
attrs=_process_attributes(element.attrs),
text=element.string if element.string else None
)
# Recursively parse and add child elements
for child in element.children:
if child.name is not None: # Only process tags, ignore NavigableStrings
html_element.children.append(parse_element(child))
return html_element
def get_from_html(html_str, path=None, attrs=None):
soup = BeautifulSoup(html_str, 'html.parser')
element = parse_element(soup)
return element if path is None else search_elements_by_path(element, path, attrs)[0]
def print_path(path):
return f"Path '{path}':\n\t" if path else ""
def get_path_attributes(path):
"""
Get the attributes from
div#id[attr1=value1][attr2=value2]
:param path:
:return:
"""
attrs = {}
match = compiled_pattern.match(path)
if match:
attrs['tag'] = match.group('tag')
if match.group('id'):
attrs['id'] = match.group('id')
attributes = match.group("attributes")
attr_matches = compiled_attr_pattern.findall(attributes)
for name, value in attr_matches:
attrs[name] = value
return attrs
def match_attrs(element_attrs, criteria_attrs):
if not criteria_attrs:
return True
return all(item in element_attrs.items() for item in criteria_attrs.items())
def contains_attrs(element_attrs, criteria_attrs):
if not criteria_attrs:
return True
return all(k in element_attrs and v in element_attrs[k] for k, v in criteria_attrs.items())
def search_elements_by_name(ft, tag: str = None, attrs: dict = None, comparison_method: str = "exact"):
"""
Select all elements that either match the tag and / or the attribute
:param ft:
:param tag:
:param attrs:
:param comparison_method: 'exact' or 'contains'
:return:
"""
compare_attrs = contains_attrs if comparison_method == "contains" else match_attrs
def _search_elements_by_name(_ft):
result = []
if isinstance(_ft, NotStr) and tag is not None and tag.lower() == "notstr":
result.append(_ft)
elif hasattr(_ft, "tag"):
# Base case: check if the current element matches the criteria
if (tag is None or _ft.tag == tag) and compare_attrs(_ft.attrs, attrs):
result.append(_ft)
# Recursive case: search through the children
for child in _ft.children:
result.extend(_search_elements_by_name(child))
elif isinstance(_ft, (list, tuple)):
for _item in _ft:
result.extend(_search_elements_by_name(_item))
return result
if isinstance(ft, list):
res = []
for item in ft:
res.extend(_search_elements_by_name(item))
return res if res else None
return _search_elements_by_name(ft)
def search_elements_by_path(ft, path: str, attrs: dict = None):
"""
Selects elements that match a given path. The path is a dot-separated list of elements.
Once the path if found, the optional attributes are compared against the last element's
attributes.
Note the path may not start at the root node of the tree structure.
:param ft: The root node of the tree structure to search within.
:param path: Dot-separated string representing the path to match within the tree structure.
:param attrs: Optional dictionary of attributes to match against the tree nodes. If not
provided, no attribute filtering is applied.
:return: A list of nodes matching the given path and attributes.
"""
parts = path.split(".")
tail = parts.pop()
head = ".".join(parts)
def _find(current, previous_path):
result = []
if (current.tag == tail
and previous_path.endswith(head)
and match_attrs(current.attrs, attrs)):
result.append(current)
for child in current.children:
if hasattr(child, "tag"):
next_path = previous_path + "." + current.tag if previous_path else current.tag
result.extend(_find(child, next_path))
return result
return _find(ft, "")
def search_first_with_attribute(ft, tag, attribute):
"""
Browse ft and its children to find the first element that matches the tag and has the attribute defined
We do not care about the value of the attribute, just the presence of it.
if tag is None, it will return the first element with the attribute
:param ft:
:param tag:
:param attribute
:
:return:
"""
if attribute is None:
raise ValueError("Attribute must be provided to find an element.")
if not hasattr(ft, "tag"):
return None
# Check the current element
if (tag is None or ft.tag == tag) and attribute in ft.attrs:
return ft
# Traverse children if the current element doesn't match
for child in ft.children:
result = search_first_with_attribute(child, tag, attribute)
if result:
return result
return None
def find_first_match(ft, path: str):
"""
Use backtracking to find the first element that matches the full path
you can use #id and [attr=value] in the path
exemple : div#id[attr=value].div.span#id_2[class=class_2]
will return the span#id_2 element if it exists
:param ft:
:param path:
:return:
"""
def _matches(element, path_part):
"""Check if an element matches a specific path part."""
if not hasattr(element, "attrs"):
return False
attrs_to_match = get_path_attributes(path_part)
element_attrs = element.attrs.copy() | {"tag": element.tag}
return all(element_attrs.get(attr) == value for attr, value in attrs_to_match.items())
def _search(elements, path_parts):
"""Recursively search for the matching element."""
if not path_parts:
return None
for element in elements:
if _matches(element, path_parts[0]):
if len(path_parts) == 1:
return element
res = _search(element.children, path_parts[1:])
if res is not None:
return res
return None
elements_as_list = ft if isinstance(ft, (list, tuple)) else [ft]
return _search(elements_as_list, path.split("."))
def matches(actual, expected, path=""):
def _type(x):
if isinstance(x, numpy.int64):
return int
elif isinstance(x, numpy.float64):
return float
return type(x)
def _debug(_actual, _expected):
str_actual = _debug_print_actual(_actual, _expected, "", 3)
str_expected = _debug_print_expected(_expected, "", 2)
return f"\nactual={str_actual}\nexpected={str_expected}"
def _debug_value(x):
if x in ("** NOT FOUND **", "** NONE **", "** NO MORE CHILDREN **"):
return x
elif isinstance(x, str):
return f"'{x}'" if "'" not in x else f'"{x}"'
else:
return x
def _debug_print_actual(_actual, _expected, indent, max_level):
# debug print both actual and expected, showing only expected elements
if max_level == 0:
return ""
if _actual is None:
return f"{indent}** NONE **"
if not hasattr(_actual, "tag") or not hasattr(_expected, "tag"):
return f"{indent}{_actual}"
str_actual = f"{indent}({_actual.tag}"
first_attr = True
for attr in _expected.attrs:
comma = " " if first_attr else ", "
str_actual += f"{comma}{attr}={_debug_value(_actual.attrs.get(attr, '** NOT FOUND **'))}"
first_attr = False
if len(_expected.children) == 0 and len(_actual.children) and max_level > 1:
# force recursion to see sub levels
for _actual_child in _actual.children:
str_child_a = _debug_print_actual(_actual_child, _actual_child, indent + " ", max_level - 1)
str_actual += "\n" + str_child_a if str_child_a else ""
else:
for index, _expected_child in enumerate(_expected.children):
if len(_actual.children) > index:
_actual_child = _actual.children[index]
else:
_actual_child = "** NO MORE CHILDREN **"
str_child_a = _debug_print_actual(_actual_child, _expected_child, indent + " ", max_level - 1)
str_actual += "\n" + str_child_a if str_child_a else ""
str_actual += ")"
return str_actual
def _debug_print_expected(_expected, indent, max_level):
if max_level == 0:
return ""
if _expected is None:
return f"{indent}** NONE **"
if not hasattr(_expected, "tag"):
return f"{indent}{_expected}"
str_expected = f"{indent}({_expected.tag}"
first_attr = True
for attr in _expected.attrs:
comma = " " if first_attr else ", "
str_expected += f"{comma}{attr}={_expected.attrs[attr]}"
first_attr = False
for _expected_child in _expected.children:
str_child_e = _debug_print_expected(_expected_child, indent + " ", max_level - 1)
str_expected += "\n" + str_child_e if str_child_e else ""
str_expected += ")"
return str_expected
if actual is None and expected is not None:
assert False, f"{print_path(path)}actual is None !"
if isinstance(expected, DoNotCheck):
return True
if expected is Empty:
assert actual.attrs == {}, f"Empty element expected, but found attributes {actual.attrs}."
assert len(actual.children) == 0, f"Empty element expected, but found children {actual.children}."
return True
assert _type(actual) == _type(expected) or (hasattr(actual, "tag") and hasattr(expected, "tag")), \
f"{print_path(path)}The types are different: {type(actual)} != {type(expected)}{_debug(actual, expected)}."
if isinstance(expected, (list, tuple)):
assert len(actual) >= len(expected), \
f"{print_path(path)}Some required elements are missing: {len(actual)=} < {len(expected)}, \n{_debug(actual, expected)}."
for actual_child, expected_child in zip(actual, expected):
assert matches(actual_child, expected_child)
elif isinstance(expected, NotStr):
to_compare = actual.s.lstrip('\n').lstrip()
assert to_compare.startswith(expected.s), \
f"{print_path(path)}NotStr are different: '{actual.s.lstrip('\n')}' != '{expected.s}'."
elif hasattr(actual, "tag"):
assert actual.tag == expected.tag, \
f"{print_path(path)}The elements are different: '{actual.tag}' != '{expected.tag}'."
# tag are the same, I can update it and be up to date when attr comparison fails
path = path + "." + actual.tag if path else actual.tag
if "id" in actual.attrs:
path += f"#{actual.attrs['id']}"
elif "name" in actual.attrs:
path += f"[name={actual.attrs['name']}]"
elif "class" in actual.attrs:
path += f"[class={actual.attrs['class']}]"
# only test the attributes referenced by the expected
for expected_attr in expected.attrs:
assert expected_attr in actual.attrs, \
f"{print_path(path)}Attribute '{expected_attr}' is not found (with expected value: '{expected.attrs[expected_attr]}'). actual='{actual.attrs}'."
if isinstance(expected.attrs[expected_attr], StartsWith):
assert actual.attrs[expected_attr].startswith(expected.attrs[expected_attr].s), \
f"{print_path(path)}Attribute '{expected_attr}' does not start with '{expected.attrs[expected_attr].s}': actual='{actual.attrs[expected_attr]}', expected ='{expected.attrs[expected_attr].s}'."
elif isinstance(expected.attrs[expected_attr], Contains):
assert expected.attrs[expected_attr].s in actual.attrs[expected_attr], \
f"{print_path(path)}Attribute '{expected_attr}' does not contain '{expected.attrs[expected_attr].s}': actual='{actual.attrs[expected_attr]}', expected ='{expected.attrs[expected_attr].s}'."
else:
assert actual.attrs[expected_attr] == expected.attrs[expected_attr], \
f"{print_path(path)}The values are different for '{expected_attr}' : '{actual.attrs[expected_attr]}' != '{expected.attrs[expected_attr]}'."
if len(expected.children) > 0 and expected.children[0] is Empty:
matches(actual, expected.children[0], path)
else:
# hack to manage ft and Html object different behaviour
if len(actual.children) == 0 and len(expected.children) == 1 and expected.children[0] == NotStr(""):
pass
else:
assert len(actual.children) >= len(expected.children), \
f"{print_path(path)}Some required elements are missing: len(actual)={len(actual.children)} < len(expected)={len(expected.children)}{_debug(actual, expected)}."
for actual_child, expected_child in zip(actual.children, expected.children):
matches(actual_child, expected_child, path)
else:
assert actual == expected, \
f"{print_path(path)}The values are not the same: '{actual}' != '{expected}'."
return True
def contains(lst, element, recursive=False):
"""
Check if any item in the list matches the given element pattern
using the existing matches() function.
Args:
lst: List of elements to search through
element: Element pattern to match against
recursive: If True, also search in children of each element
Returns:
bool: True if a match is found, False otherwise
"""
if not lst:
return False
for item in lst:
try:
if matches(item, element):
return True
except AssertionError:
pass
# If recursive is True, check children too
if recursive and hasattr(item, "children") and item.children:
if contains(item.children, element, recursive=True):
return True
return False
def get_selected(return_elements):
assert isinstance(return_elements, list), "result must be a list"
for element in return_elements:
if hasattr(element, "id") and element.id.startswith("tsm_"):
break
else:
assert False, "No element with id 'tsm_' found in the return elements"
res = []
for child in element.children:
selection_type = child.attrs["selection-type"]
if selection_type.startswith("cell"):
split = child.attrs["element-id"].split("-")
selected = (selection_type, int(split[-2]), int(split[-1]))
elif selection_type == "row":
split = child.attrs["element-id"].split("-")
selected = ("row", int(split[-1]))
elif selection_type == "column":
element_id = child.attrs["element-id"]
selected = ("column", element_id)
else:
raise NotImplemented("")
res.append(selected)
return res
def get_context_menu(return_elements):
assert isinstance(return_elements, list), "result must be a list"
found = False
res = []
for element in return_elements:
if hasattr(element, "id") and element.id[:5] in ("cmcm_", "cmrm_"):
found = True
for child in element.children:
if "hx-post" in child.attrs:
context_menu = {
"hx-post": "/" + "/".join(child.attrs["hx-post"].split("/")[2:]),
"data_tooltip": child.attrs["data-tooltip"],
}
if "hx-vals" in child.attrs:
args = json.loads(child.attrs["hx-vals"])
args_to_use = {key: value for key, value in args.items() if key != "g_id"}
context_menu.update(args_to_use)
res.append(context_menu)
if not found:
assert False, "No element with id 'cmcm_' found in the return elements"
return res
def debug_print(ft, attr1st=False):
return html2ft(to_xml(ft), attr1st=attr1st)
def extract_table_values(element, header=True):
"""
Given element with tags and attributes
Try to find the table values
:param element:
:param header: search for header and add it to the result
:return:
"""
# first, get the header
if header:
header = search_elements_by_name(element, attrs={"class": "dt-row dt-header"})[0]
header_map = {}
res = OrderedDict()
for row in header.children:
col_index = row.attrs["data-col"]
name_element = search_elements_by_name(row, attrs={"name": "dt-header-title"})[0]
name = name_element.children[0] if len(name_element.children) > 0 else name_element.text
header_map[col_index] = name
res[name] = []
body = search_elements_by_name(element, attrs={"class": "dt-body"})[0]
for row in body.children:
for col in row.children:
col_index = col.attrs["data-col"]
cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0]
cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text
res[header_map[col_index]].append(cell_value)
return res
else:
body = search_elements_by_name(element, attrs={"class": "dt-body"})[0]
res = []
for row in body.children:
row_values = []
for col in row.children:
column = search_elements_by_name(col, attrs={"name": "dt-cell-content"})
if len(column) > 0:
cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0]
cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text
row_values.append(cell_value)
res.append(row_values)
return res
def extract_table_values_new(ft, header=True):
def _get_cell_content_value(cell_element):
# try using data-tooltip
tooltip_element = search_first_with_attribute(cell_element, None, "data-tooltip")
if tooltip_element is not None:
return tooltip_element.attrs["data-tooltip"]
# for checkboxes, use the name of the NotStr element
svg_element = search_elements_by_name(cell_element, "NotStr")
if svg_element:
match = compiled_svg_pattern.search(svg_element[0].s)
if match:
svg_name = match.group(1)
return True if svg_name == "checked" else False if svg_name == "unchecked" else None
return None
# first, get the header
if header:
header = search_elements_by_name(ft, attrs={"class": "dt2-header"}, comparison_method='contains')[0]
header_map = {}
res = OrderedDict()
for row in header.children:
col_id = row.attrs["data-col"]
title = row.attrs["data-tooltip"]
header_map[col_id] = title
res[title] = []
body = search_elements_by_name(ft, attrs={"class": "dt2-body"}, comparison_method='contains')[0]
for row in body.children:
for col in row.children:
col_id = col.attrs["data-col"]
cell_value = _get_cell_content_value(col)
res[header_map[col_id]].append(cell_value)
return res
else:
body = search_elements_by_name(ft, attrs={"class": "dt2-body"})[0]
res = []
for row in body.children:
row_values = []
for col in row.children:
columns = search_elements_by_name(col, attrs={"class": "dt2-cell-content"}, comparison_method="contains")
cell_value = _get_cell_content_value(columns)
row_values.append(cell_value)
res.append(row_values)
return res
def extract_footer_values(element):
body = search_elements_by_name(element, attrs={"class": "dt-table-footer"})[0]
res = []
for row in body.children:
row_values = []
for col in row.children:
cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0]
cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text
row_values.append(cell_value)
res.append(row_values)
return res
def extract_popup_content(element, filter_input=True) -> OrderedDict:
"""
Extract the checkboxes and their values from the popup content
:param element:
:param filter_input: add the value of the filter input if requested.
:return:
"""
res = OrderedDict()
if filter_input:
filter_value_element = search_elements_by_name(element, attrs={"name": "dt-popup-filter-input"})[0]
res["__filter_input__"] = _get_element_value(filter_value_element) or ''
checkboxes_div = search_elements_by_name(element, attrs={"class": 'dt-filter-popup-content'})[0]
checkboxes_elements = search_elements_by_name(checkboxes_div, attrs={"type": "checkbox"})
for element in checkboxes_elements:
res[element.attrs['value']] = 'checked' in element.attrs
return res
def extract_jsonviewer_node(element):
# This structure of the Jsonview Node is
# 3 children
# 1st : Span(NotStr(name="expanded|collapse")) or None
# 2nd : Span("key : ") or None (None is possible only for the root node)
# 3rd : Span(value)
if not hasattr(element, "children") or len(element.children) != 3:
return None
debug_folding = element.children[0]
debug_key = element.children[1]
value = element.children[2]
if contains([debug_folding], span_icon("expanded")):
is_expanded = True
elif contains([debug_folding], span_icon("collapsed")):
is_expanded = False
else:
is_expanded = None
if debug_key is not None:
assert hasattr(debug_key, "tag") and debug_key.tag == "span", "debug_key must be a span"
key = debug_key.children[0].split(" : ")[0]
else:
key = None
return JsonViewerNode(
is_expanded,
key,
value,
debug_key,
debug_folding
)
def to_array(dataframe: pd.DataFrame) -> list:
return [[val for val in row] for _, row in dataframe.iterrows()]
def _get_element_value(element):
return element.children[0] if len(element.children) > 0 else element.text
def icon(name: str):
"""
Test if an element is an icon
:param name:
:return:
"""
return NotStr(f'<svg name="{name}"')
def div_icon(name: str):
"""
Test if an element is an icon wrapped in a div
:param name:
:return:
"""
return Div(NotStr(f'<svg name="{name}"'))
def span_icon(name: str):
return Span(NotStr(f'<svg name="{name}"'))
def div_ellipsis(text: str):
return Div(text, cls="truncate", data_tooltip=text)