import dataclasses import json import re from collections import OrderedDict from typing import Any import numpy import pandas as pd from bs4 import BeautifulSoup from fastcore.basics import NotStr from fastcore.xml import to_xml from fasthtml.components import html2ft, Div, Span pattern = r"""(?P\w+)(?:#(?P[\w-]+))?(?P(?:\[\w+=['"]?[\w_-]+['"]?\])*)""" attr_pattern = r"""\[(?P\w+)=['"]?(?P[\w_-]+)['"]?\]""" svg_pattern = r"""svg name="(\w+)\"""" compiled_pattern = re.compile(pattern) compiled_attr_pattern = re.compile(attr_pattern) compiled_svg_pattern = re.compile(svg_pattern) @dataclasses.dataclass class DoNotCheck: desc: str = None class EmptyElement: pass @dataclasses.dataclass class StartsWith: """ To check if the attribute starts with a specific value """ s: str @dataclasses.dataclass class Contains: """ To check if the attribute contains a specific value """ s: str @dataclasses.dataclass class JsonViewerNode: is_expanded: bool | None key: str value: Any debug_key: Any = None debug_folding: Any = None def find(self, path): """ Finds and returns a node in a hierarchical structure based on a dot-delimited path. The method uses a recursive helper function to navigate through a tree-like hierarchical node structure. Each node in the structure is assumed to potentially have a "children" attribute, which is iterated to find matching keys in the path. If, at any point, a node does not have the expected structure or the key is not found within the children, the method will return None. :param path: A dot-delimited string representing the hierarchical path to the desired node (e.g., "root.child.subchild"). :return: The node in the hierarchy that matches the specified path or None if no such node exists. """ def _find(node, path_parts): if len(path_parts) == 0: return node element = node.value # to deal with ft element if not hasattr(element, "children"): return None to_find = path_parts[0] for child in element.children: child_node = extract_jsonviewer_node(child) if child_node is not None and child_node.key == to_find: return _find(child_node, path_parts[1:]) return None path_parts = path.split(".") return _find(self, path_parts) def text_value(self): return str(self.value.children[0]) Empty = EmptyElement() @dataclasses.dataclass class HTMLElement: tag: str attrs: dict children: list['HTMLElement'] = dataclasses.field(default_factory=list) text: str | None = None # Function to transform BeautifulSoup elements into the HTMLElement class def parse_element(element) -> HTMLElement: def _process_attributes(attrs): return {key: ' '.join(value) if isinstance(value, list) else value for key, value in attrs.items()} # Create an HTMLElement object for the current element html_element = HTMLElement( tag=element.name, attrs=_process_attributes(element.attrs), text=element.string if element.string else None ) # Recursively parse and add child elements for child in element.children: if child.name is not None: # Only process tags, ignore NavigableStrings html_element.children.append(parse_element(child)) return html_element def get_from_html(html_str, path=None, attrs=None): soup = BeautifulSoup(html_str, 'html.parser') element = parse_element(soup) return element if path is None else search_elements_by_path(element, path, attrs)[0] def print_path(path): return f"Path '{path}':\n\t" if path else "" def get_path_attributes(path): """ Get the attributes from div#id[attr1=value1][attr2=value2] :param path: :return: """ attrs = {} match = compiled_pattern.match(path) if match: attrs['tag'] = match.group('tag') if match.group('id'): attrs['id'] = match.group('id') attributes = match.group("attributes") attr_matches = compiled_attr_pattern.findall(attributes) for name, value in attr_matches: attrs[name] = value return attrs def match_attrs(element_attrs, criteria_attrs): if not criteria_attrs: return True return all(item in element_attrs.items() for item in criteria_attrs.items()) def contains_attrs(element_attrs, criteria_attrs): if not criteria_attrs: return True return all(k in element_attrs and v in element_attrs[k] for k, v in criteria_attrs.items()) def search_elements_by_name(ft, tag: str = None, attrs: dict = None, comparison_method: str = "exact"): """ Select all elements that either match the tag and / or the attribute :param ft: :param tag: :param attrs: :param comparison_method: 'exact' or 'contains' :return: """ compare_attrs = contains_attrs if comparison_method == "contains" else match_attrs def _search_elements_by_name(_ft): result = [] if isinstance(_ft, NotStr) and tag is not None and tag.lower() == "notstr": result.append(_ft) elif hasattr(_ft, "tag"): # Base case: check if the current element matches the criteria if (tag is None or _ft.tag == tag) and compare_attrs(_ft.attrs, attrs): result.append(_ft) # Recursive case: search through the children for child in _ft.children: result.extend(_search_elements_by_name(child)) elif isinstance(_ft, (list, tuple)): for _item in _ft: result.extend(_search_elements_by_name(_item)) return result if isinstance(ft, list): res = [] for item in ft: res.extend(_search_elements_by_name(item)) return res if res else None return _search_elements_by_name(ft) def search_elements_by_path(ft, path: str, attrs: dict = None): """ Selects elements that match a given path. The path is a dot-separated list of elements. Once the path if found, the optional attributes are compared against the last element's attributes. Note the path may not start at the root node of the tree structure. :param ft: The root node of the tree structure to search within. :param path: Dot-separated string representing the path to match within the tree structure. :param attrs: Optional dictionary of attributes to match against the tree nodes. If not provided, no attribute filtering is applied. :return: A list of nodes matching the given path and attributes. """ parts = path.split(".") tail = parts.pop() head = ".".join(parts) def _find(current, previous_path): result = [] if (current.tag == tail and previous_path.endswith(head) and match_attrs(current.attrs, attrs)): result.append(current) for child in current.children: if hasattr(child, "tag"): next_path = previous_path + "." + current.tag if previous_path else current.tag result.extend(_find(child, next_path)) return result return _find(ft, "") def search_first_with_attribute(ft, tag, attribute): """ Browse ft and its children to find the first element that matches the tag and has the attribute defined We do not care about the value of the attribute, just the presence of it. if tag is None, it will return the first element with the attribute :param ft: :param tag: :param attribute : :return: """ if attribute is None: raise ValueError("Attribute must be provided to find an element.") if not hasattr(ft, "tag"): return None # Check the current element if (tag is None or ft.tag == tag) and attribute in ft.attrs: return ft # Traverse children if the current element doesn't match for child in ft.children: result = search_first_with_attribute(child, tag, attribute) if result: return result return None def find_first_match(ft, path: str): """ Use backtracking to find the first element that matches the full path you can use #id and [attr=value] in the path exemple : div#id[attr=value].div.span#id_2[class=class_2] will return the span#id_2 element if it exists :param ft: :param path: :return: """ def _matches(element, path_part): """Check if an element matches a specific path part.""" if not hasattr(element, "attrs"): return False attrs_to_match = get_path_attributes(path_part) element_attrs = element.attrs.copy() | {"tag": element.tag} return all(element_attrs.get(attr) == value for attr, value in attrs_to_match.items()) def _search(elements, path_parts): """Recursively search for the matching element.""" if not path_parts: return None for element in elements: if _matches(element, path_parts[0]): if len(path_parts) == 1: return element res = _search(element.children, path_parts[1:]) if res is not None: return res return None elements_as_list = ft if isinstance(ft, (list, tuple)) else [ft] return _search(elements_as_list, path.split(".")) def matches(actual, expected, path=""): def _type(x): if isinstance(x, numpy.int64): return int elif isinstance(x, numpy.float64): return float return type(x) def _debug(_actual, _expected): str_actual = _debug_print_actual(_actual, _expected, "", 3) str_expected = _debug_print_expected(_expected, "", 2) return f"\nactual={str_actual}\nexpected={str_expected}" def _debug_value(x): if x in ("** NOT FOUND **", "** NONE **", "** NO MORE CHILDREN **"): return x elif isinstance(x, str): return f"'{x}'" if "'" not in x else f'"{x}"' else: return x def _debug_print_actual(_actual, _expected, indent, max_level): # debug print both actual and expected, showing only expected elements if max_level == 0: return "" if _actual is None: return f"{indent}** NONE **" if not hasattr(_actual, "tag") or not hasattr(_expected, "tag"): return f"{indent}{_actual}" str_actual = f"{indent}({_actual.tag}" first_attr = True for attr in _expected.attrs: comma = " " if first_attr else ", " str_actual += f"{comma}{attr}={_debug_value(_actual.attrs.get(attr, '** NOT FOUND **'))}" first_attr = False if len(_expected.children) == 0 and len(_actual.children) and max_level > 1: # force recursion to see sub levels for _actual_child in _actual.children: str_child_a = _debug_print_actual(_actual_child, _actual_child, indent + " ", max_level - 1) str_actual += "\n" + str_child_a if str_child_a else "" else: for index, _expected_child in enumerate(_expected.children): if len(_actual.children) > index: _actual_child = _actual.children[index] else: _actual_child = "** NO MORE CHILDREN **" str_child_a = _debug_print_actual(_actual_child, _expected_child, indent + " ", max_level - 1) str_actual += "\n" + str_child_a if str_child_a else "" str_actual += ")" return str_actual def _debug_print_expected(_expected, indent, max_level): if max_level == 0: return "" if _expected is None: return f"{indent}** NONE **" if not hasattr(_expected, "tag"): return f"{indent}{_expected}" str_expected = f"{indent}({_expected.tag}" first_attr = True for attr in _expected.attrs: comma = " " if first_attr else ", " str_expected += f"{comma}{attr}={_expected.attrs[attr]}" first_attr = False for _expected_child in _expected.children: str_child_e = _debug_print_expected(_expected_child, indent + " ", max_level - 1) str_expected += "\n" + str_child_e if str_child_e else "" str_expected += ")" return str_expected if actual is None and expected is not None: assert False, f"{print_path(path)}actual is None !" if isinstance(expected, DoNotCheck): return True if expected is Empty: assert actual.attrs == {}, f"Empty element expected, but found attributes {actual.attrs}." assert len(actual.children) == 0, f"Empty element expected, but found children {actual.children}." return True assert _type(actual) == _type(expected) or (hasattr(actual, "tag") and hasattr(expected, "tag")), \ f"{print_path(path)}The types are different: {type(actual)} != {type(expected)}{_debug(actual, expected)}." if isinstance(expected, (list, tuple)): assert len(actual) >= len(expected), \ f"{print_path(path)}Some required elements are missing: {len(actual)=} < {len(expected)}, \n{_debug(actual, expected)}." for actual_child, expected_child in zip(actual, expected): assert matches(actual_child, expected_child) elif isinstance(expected, NotStr): to_compare = actual.s.lstrip('\n').lstrip() assert to_compare.startswith(expected.s), \ f"{print_path(path)}NotStr are different: '{actual.s.lstrip('\n')}' != '{expected.s}'." elif hasattr(actual, "tag"): assert actual.tag == expected.tag, \ f"{print_path(path)}The elements are different: '{actual.tag}' != '{expected.tag}'." # tag are the same, I can update it and be up to date when attr comparison fails path = path + "." + actual.tag if path else actual.tag if "id" in actual.attrs: path += f"#{actual.attrs['id']}" elif "name" in actual.attrs: path += f"[name={actual.attrs['name']}]" elif "class" in actual.attrs: path += f"[class={actual.attrs['class']}]" # only test the attributes referenced by the expected for expected_attr in expected.attrs: assert expected_attr in actual.attrs, \ f"{print_path(path)}Attribute '{expected_attr}' is not found (with expected value: '{expected.attrs[expected_attr]}'). actual='{actual.attrs}'." if isinstance(expected.attrs[expected_attr], StartsWith): assert actual.attrs[expected_attr].startswith(expected.attrs[expected_attr].s), \ f"{print_path(path)}Attribute '{expected_attr}' does not start with '{expected.attrs[expected_attr].s}': actual='{actual.attrs[expected_attr]}', expected ='{expected.attrs[expected_attr].s}'." elif isinstance(expected.attrs[expected_attr], Contains): assert expected.attrs[expected_attr].s in actual.attrs[expected_attr], \ f"{print_path(path)}Attribute '{expected_attr}' does not contain '{expected.attrs[expected_attr].s}': actual='{actual.attrs[expected_attr]}', expected ='{expected.attrs[expected_attr].s}'." else: assert actual.attrs[expected_attr] == expected.attrs[expected_attr], \ f"{print_path(path)}The values are different for '{expected_attr}' : '{actual.attrs[expected_attr]}' != '{expected.attrs[expected_attr]}'." if len(expected.children) > 0 and expected.children[0] is Empty: matches(actual, expected.children[0], path) else: # hack to manage ft and Html object different behaviour if len(actual.children) == 0 and len(expected.children) == 1 and expected.children[0] == NotStr(""): pass else: assert len(actual.children) >= len(expected.children), \ f"{print_path(path)}Some required elements are missing: len(actual)={len(actual.children)} < len(expected)={len(expected.children)}{_debug(actual, expected)}." for actual_child, expected_child in zip(actual.children, expected.children): matches(actual_child, expected_child, path) else: assert actual == expected, \ f"{print_path(path)}The values are not the same: '{actual}' != '{expected}'." return True def contains(lst, element, recursive=False): """ Check if any item in the list matches the given element pattern using the existing matches() function. Args: lst: List of elements to search through element: Element pattern to match against recursive: If True, also search in children of each element Returns: bool: True if a match is found, False otherwise """ if not lst: return False for item in lst: try: if matches(item, element): return True except AssertionError: pass # If recursive is True, check children too if recursive and hasattr(item, "children") and item.children: if contains(item.children, element, recursive=True): return True return False def get_selected(return_elements): assert isinstance(return_elements, list), "result must be a list" for element in return_elements: if hasattr(element, "id") and element.id.startswith("tsm_"): break else: assert False, "No element with id 'tsm_' found in the return elements" res = [] for child in element.children: selection_type = child.attrs["selection-type"] if selection_type.startswith("cell"): split = child.attrs["element-id"].split("-") selected = (selection_type, int(split[-2]), int(split[-1])) elif selection_type == "row": split = child.attrs["element-id"].split("-") selected = ("row", int(split[-1])) elif selection_type == "column": element_id = child.attrs["element-id"] selected = ("column", element_id) else: raise NotImplemented("") res.append(selected) return res def get_context_menu(return_elements): assert isinstance(return_elements, list), "result must be a list" found = False res = [] for element in return_elements: if hasattr(element, "id") and element.id[:5] in ("cmcm_", "cmrm_"): found = True for child in element.children: if "hx-post" in child.attrs: context_menu = { "hx-post": "/" + "/".join(child.attrs["hx-post"].split("/")[2:]), "data_tooltip": child.attrs["data-tooltip"], } if "hx-vals" in child.attrs: args = json.loads(child.attrs["hx-vals"]) args_to_use = {key: value for key, value in args.items() if key != "g_id"} context_menu.update(args_to_use) res.append(context_menu) if not found: assert False, "No element with id 'cmcm_' found in the return elements" return res def debug_print(ft, attr1st=False): return html2ft(to_xml(ft), attr1st=attr1st) def extract_table_values(element, header=True): """ Given element with tags and attributes Try to find the table values :param element: :param header: search for header and add it to the result :return: """ # first, get the header if header: header = search_elements_by_name(element, attrs={"class": "dt-row dt-header"})[0] header_map = {} res = OrderedDict() for row in header.children: col_index = row.attrs["data-col"] name_element = search_elements_by_name(row, attrs={"name": "dt-header-title"})[0] name = name_element.children[0] if len(name_element.children) > 0 else name_element.text header_map[col_index] = name res[name] = [] body = search_elements_by_name(element, attrs={"class": "dt-body"})[0] for row in body.children: for col in row.children: col_index = col.attrs["data-col"] cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0] cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text res[header_map[col_index]].append(cell_value) return res else: body = search_elements_by_name(element, attrs={"class": "dt-body"})[0] res = [] for row in body.children: row_values = [] for col in row.children: column = search_elements_by_name(col, attrs={"name": "dt-cell-content"}) if len(column) > 0: cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0] cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text row_values.append(cell_value) res.append(row_values) return res def extract_table_values_new(ft, header=True): def _get_cell_content_value(cell_element): # try using data-tooltip tooltip_element = search_first_with_attribute(cell_element, None, "data-tooltip") if tooltip_element is not None: return tooltip_element.attrs["data-tooltip"] # for checkboxes, use the name of the NotStr element svg_element = search_elements_by_name(cell_element, "NotStr") if svg_element: match = compiled_svg_pattern.search(svg_element[0].s) if match: svg_name = match.group(1) return True if svg_name == "checked" else False if svg_name == "unchecked" else None return None # first, get the header if header: header = search_elements_by_name(ft, attrs={"class": "dt2-header"}, comparison_method='contains')[0] header_map = {} res = OrderedDict() for row in header.children: col_id = row.attrs["data-col"] title = row.attrs["data-tooltip"] header_map[col_id] = title res[title] = [] body = search_elements_by_name(ft, attrs={"class": "dt2-body"}, comparison_method='contains')[0] for row in body.children: for col in row.children: col_id = col.attrs["data-col"] cell_value = _get_cell_content_value(col) res[header_map[col_id]].append(cell_value) return res else: body = search_elements_by_name(ft, attrs={"class": "dt2-body"})[0] res = [] for row in body.children: row_values = [] for col in row.children: columns = search_elements_by_name(col, attrs={"class": "dt2-cell-content"}, comparison_method="contains") cell_value = _get_cell_content_value(columns) row_values.append(cell_value) res.append(row_values) return res def extract_footer_values(element): body = search_elements_by_name(element, attrs={"class": "dt-table-footer"})[0] res = [] for row in body.children: row_values = [] for col in row.children: cell_element = search_elements_by_name(col, attrs={"name": "dt-cell-content"})[0] cell_value = cell_element.children[0] if len(cell_element.children) > 0 else cell_element.text row_values.append(cell_value) res.append(row_values) return res def extract_popup_content(element, filter_input=True) -> OrderedDict: """ Extract the checkboxes and their values from the popup content :param element: :param filter_input: add the value of the filter input if requested. :return: """ res = OrderedDict() if filter_input: filter_value_element = search_elements_by_name(element, attrs={"name": "dt-popup-filter-input"})[0] res["__filter_input__"] = _get_element_value(filter_value_element) or '' checkboxes_div = search_elements_by_name(element, attrs={"class": 'dt-filter-popup-content'})[0] checkboxes_elements = search_elements_by_name(checkboxes_div, attrs={"type": "checkbox"}) for element in checkboxes_elements: res[element.attrs['value']] = 'checked' in element.attrs return res def extract_jsonviewer_node(element): # This structure of the Jsonview Node is # 3 children # 1st : Span(NotStr(name="expanded|collapse")) or None # 2nd : Span("key : ") or None (None is possible only for the root node) # 3rd : Span(value) if not hasattr(element, "children") or len(element.children) != 3: return None debug_folding = element.children[0] debug_key = element.children[1] value = element.children[2] if contains([debug_folding], span_icon("expanded")): is_expanded = True elif contains([debug_folding], span_icon("collapsed")): is_expanded = False else: is_expanded = None if debug_key is not None: assert hasattr(debug_key, "tag") and debug_key.tag == "span", "debug_key must be a span" key = debug_key.children[0].split(" : ")[0] else: key = None return JsonViewerNode( is_expanded, key, value, debug_key, debug_folding ) def to_array(dataframe: pd.DataFrame) -> list: return [[val for val in row] for _, row in dataframe.iterrows()] def _get_element_value(element): return element.children[0] if len(element.children) > 0 else element.text def icon(name: str): """ Test if an element is an icon :param name: :return: """ return NotStr(f'