853 lines
27 KiB
Python
853 lines
27 KiB
Python
import dataclasses
|
|
import json
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from typing import Self
|
|
|
|
from bs4 import BeautifulSoup, Tag
|
|
from fastcore.xml import FT, to_xml
|
|
from fasthtml.common import FastHTML
|
|
from starlette.responses import Response
|
|
from starlette.testclient import TestClient
|
|
|
|
from myfasthtml.core.commands import mount_commands
|
|
|
|
|
|
@dataclass
|
|
class MyFT:
|
|
tag: str
|
|
attrs: dict
|
|
children: list['MyFT'] = dataclasses.field(default_factory=list)
|
|
text: str | None = None
|
|
|
|
|
|
class TestableElement:
|
|
"""
|
|
Represents an HTML element that can be interacted with in tests.
|
|
|
|
This class will be used for future interactions like clicking elements
|
|
or verifying element properties.
|
|
"""
|
|
|
|
def __init__(self, client, source):
|
|
"""
|
|
Initialize a testable element.
|
|
|
|
Args:
|
|
client: The MyTestClient instance.
|
|
ft: The FastHTML element representation.
|
|
"""
|
|
self.client = client
|
|
if isinstance(source, str):
|
|
self.html_fragment = source
|
|
tag = BeautifulSoup(source, 'html.parser').find()
|
|
self.ft = MyFT(tag.name, tag.attrs)
|
|
elif isinstance(source, Tag):
|
|
self.html_fragment = str(source)
|
|
self.ft = MyFT(source.name, source.attrs)
|
|
elif isinstance(source, FT):
|
|
self.ft = source
|
|
self.html_fragment = to_xml(source).strip()
|
|
else:
|
|
raise ValueError(f"Invalid source '{source}' for TestableElement.")
|
|
|
|
def click(self):
|
|
"""Click the element (to be implemented)."""
|
|
return self._send_htmx_request()
|
|
|
|
def matches(self, ft):
|
|
"""Check if element matches given FastHTML element (to be implemented)."""
|
|
pass
|
|
|
|
def _send_htmx_request(self, json_data: dict | None = None, data: dict | None = None) -> Response:
|
|
"""
|
|
Simulates an HTMX request in Python for unit testing.
|
|
|
|
This function reads the 'hx-*' attributes from the FastHTML object
|
|
to determine the HTTP method, URL, headers, and body of the request,
|
|
then executes it via the TestClient.
|
|
|
|
Args:
|
|
data: (Optional) A dict for form data
|
|
(sends as 'application/x-www-form-urlencoded').
|
|
json_data: (Optional) A dict for JSON data
|
|
(sends as 'application/json').
|
|
Takes precedence over 'hx_vals'.
|
|
|
|
Returns:
|
|
The Response object from the simulated request.
|
|
"""
|
|
|
|
# The essential header for FastHTML (and HTMX) to identify the request
|
|
headers = {"HX-Request": "true"}
|
|
method = "GET" # HTMX defaults to GET if not specified
|
|
url = None
|
|
|
|
verbs = {
|
|
'hx_get': 'GET',
|
|
'hx_post': 'POST',
|
|
'hx_put': 'PUT',
|
|
'hx_delete': 'DELETE',
|
|
'hx_patch': 'PATCH',
|
|
}
|
|
|
|
# .props contains the kwargs passed to the object (e.g., hx_post="/url")
|
|
element_attrs = self.ft.attrs or {}
|
|
|
|
# Build the attributes
|
|
for key, value in element_attrs.items():
|
|
|
|
# sanitize the key
|
|
key = key.lower().strip()
|
|
if key.startswith('hx-'):
|
|
key = 'hx_' + key[3:]
|
|
|
|
if key in verbs:
|
|
# Verb attribute: defines the method and URL
|
|
method = verbs[key]
|
|
url = str(value)
|
|
|
|
elif key == 'hx_vals':
|
|
# hx_vals defines the JSON body, if not already provided by the test
|
|
if json_data is None:
|
|
if isinstance(value, str):
|
|
json_data = json.loads(value)
|
|
elif isinstance(value, dict):
|
|
json_data = value
|
|
|
|
elif key.startswith('hx_'):
|
|
# Any other hx_* attribute is converted to an HTTP header
|
|
# e.g.: 'hx_target' -> 'HX-Target'
|
|
header_name = '-'.join(p.capitalize() for p in key.split('_'))
|
|
headers[header_name] = str(value)
|
|
|
|
# Sanity check
|
|
if url is None:
|
|
raise ValueError(
|
|
f"The <{self.ft.tag}> element has no HTMX verb attribute "
|
|
"(e.g., hx_get, hx_post) to define a URL."
|
|
)
|
|
|
|
# Send the request
|
|
return self.client.send_request(method, url, headers=headers, data=data, json_data=json_data)
|
|
|
|
def _support_htmx(self):
|
|
"""Check if the element supports HTMX."""
|
|
return ('hx_get' in self.ft.attrs or
|
|
'hx-get' in self.ft.attrs or
|
|
'hx_post' in self.ft.attrs or
|
|
'hx-post' in self.ft.attrs)
|
|
|
|
|
|
class TestableForm(TestableElement):
|
|
"""
|
|
Represents an HTML form that can be filled and submitted in tests.
|
|
"""
|
|
|
|
def __init__(self, client, source):
|
|
"""
|
|
Initialize a testable form.
|
|
|
|
Args:
|
|
client: The MyTestClient instance.
|
|
source: The source HTML string containing a form.
|
|
"""
|
|
super().__init__(client, source)
|
|
self.form = BeautifulSoup(self.html_fragment, 'html.parser').find('form')
|
|
self.fields_mapping = {} # link between the input label and the input name
|
|
self.fields = {} # field name; field value
|
|
self.select_fields = {} # list of possible options for 'select' input fields
|
|
|
|
self._update_fields_mapping()
|
|
self.update_fields()
|
|
|
|
def update_fields(self):
|
|
"""
|
|
Update the fields dictionary with current form values and their proper types.
|
|
|
|
This method processes all input and select elements in the form:
|
|
- Determines the appropriate Python type (str, int, float, bool) based on
|
|
the HTML input type attribute and/or the value itself
|
|
- For select elements, populates self.select_fields with available options
|
|
- Stores the final typed values in self.fields
|
|
|
|
Type conversion priority:
|
|
1. HTML type attribute (checkbox → bool, number → int/float, etc.)
|
|
2. Value analysis fallback for ambiguous types (text/hidden/absent type)
|
|
"""
|
|
self.fields = {}
|
|
self.select_fields = {}
|
|
|
|
# Process input fields
|
|
for input_field in self.form.find_all('input'):
|
|
name = input_field.get('name')
|
|
if not name:
|
|
continue
|
|
|
|
input_type = input_field.get('type', 'text').lower()
|
|
raw_value = input_field.get('value', '')
|
|
|
|
# Type conversion based on input type
|
|
if input_type == 'checkbox':
|
|
# Checkbox: bool based on 'checked' attribute
|
|
self.fields[name] = input_field.has_attr('checked')
|
|
|
|
elif input_type == 'radio':
|
|
# Radio: str value (only if checked)
|
|
if input_field.has_attr('checked'):
|
|
self.fields[name] = raw_value
|
|
elif name not in self.fields:
|
|
# If no radio is checked yet, don't set a default
|
|
pass
|
|
|
|
elif input_type == 'number':
|
|
# Number: int or float based on value
|
|
self.fields[name] = self._convert_number(raw_value)
|
|
|
|
else:
|
|
# Other types (text, hidden, email, password, etc.): analyze value
|
|
self.fields[name] = self._convert_value(raw_value)
|
|
|
|
# Process select fields
|
|
for select_field in self.form.find_all('select'):
|
|
name = select_field.get('name')
|
|
if not name:
|
|
continue
|
|
|
|
# Extract all options
|
|
options = []
|
|
selected_value = None
|
|
|
|
for option in select_field.find_all('option'):
|
|
option_value = option.get('value', option.get_text(strip=True))
|
|
option_text = option.get_text(strip=True)
|
|
|
|
options.append({
|
|
'value': option_value,
|
|
'text': option_text
|
|
})
|
|
|
|
# Track selected option
|
|
if option.has_attr('selected'):
|
|
selected_value = option_value
|
|
|
|
# Store options list
|
|
self.select_fields[name] = options
|
|
|
|
# Store selected value (or first option if none selected)
|
|
if selected_value is not None:
|
|
self.fields[name] = selected_value
|
|
elif options:
|
|
self.fields[name] = options[0]['value']
|
|
|
|
def fill(self, **kwargs):
|
|
"""
|
|
Fill the form with the given data.
|
|
|
|
Args:
|
|
**kwargs: Field names and their values to fill in the form.
|
|
"""
|
|
for name, value in kwargs.items():
|
|
field_name = self.translate(name)
|
|
if field_name not in self.fields:
|
|
raise ValueError(f"Invalid field name '{name}'.")
|
|
self.fields[self.translate(name)] = value
|
|
|
|
def submit(self):
|
|
"""
|
|
Submit the form.
|
|
|
|
This method handles both HTMX-enabled forms and classic HTML form submissions:
|
|
- If the form supports HTMX (has hx_post, hx_get, etc.), uses HTMX request
|
|
- Otherwise, simulates a classic browser form submission using the form's
|
|
action and method attributes
|
|
|
|
Returns:
|
|
The response from the form submission.
|
|
|
|
Raises:
|
|
ValueError: If the form has no action attribute for classic submission.
|
|
"""
|
|
# Check if the form supports HTMX
|
|
if self._support_htmx():
|
|
return self._send_htmx_request(data=self.fields)
|
|
|
|
# Classic form submission
|
|
action = self.form.get('action')
|
|
if not action or action.strip() == '':
|
|
raise ValueError(
|
|
"The form has no 'action' attribute. "
|
|
"Cannot submit a classic form without a target URL."
|
|
)
|
|
|
|
method = self.form.get('method', 'post').upper()
|
|
|
|
# Prepare headers for classic form submission
|
|
headers = {
|
|
"Content-Type": "application/x-www-form-urlencoded"
|
|
}
|
|
|
|
# Send the request via the client
|
|
return self.client.send_request(
|
|
method=method,
|
|
url=action,
|
|
headers=headers,
|
|
data=self.fields
|
|
)
|
|
|
|
def translate(self, field):
|
|
return self.fields_mapping.get(field, field)
|
|
|
|
def _update_fields_mapping(self):
|
|
"""
|
|
Build a mapping between label text and input field names.
|
|
|
|
This method finds all labels in the form and associates them with their
|
|
corresponding input fields using the following priority order:
|
|
1. Explicit association via 'for' attribute matching input 'id'
|
|
2. Implicit association (label contains the input)
|
|
3. Parent-level association with 'for'/'id'
|
|
4. Proximity association (siblings in same parent)
|
|
5. No label (use input name as key)
|
|
|
|
The mapping is stored in self.fields_mapping as {label_text: input_name}.
|
|
For inputs without a name, the id is used. If neither exists, a generic
|
|
key like "unnamed_0" is generated.
|
|
"""
|
|
self.fields_mapping = {}
|
|
processed_inputs = set()
|
|
unnamed_counter = 0
|
|
|
|
# Get all inputs in the form
|
|
all_inputs = self.form.find_all('input')
|
|
|
|
# Priority 1 & 2: Explicit association (for/id) and implicit (nested)
|
|
for label in self.form.find_all('label'):
|
|
label_text = label.get_text(strip=True)
|
|
|
|
# Check for explicit association via 'for' attribute
|
|
label_for = label.get('for')
|
|
if label_for:
|
|
input_field = self.form.find('input', id=label_for)
|
|
if input_field:
|
|
input_name = self._get_input_identifier(input_field, unnamed_counter)
|
|
if input_name.startswith('unnamed_'):
|
|
unnamed_counter += 1
|
|
self.fields_mapping[label_text] = input_name
|
|
processed_inputs.add(id(input_field))
|
|
continue
|
|
|
|
# Check for implicit association (label contains input)
|
|
input_field = label.find('input')
|
|
if input_field:
|
|
input_name = self._get_input_identifier(input_field, unnamed_counter)
|
|
if input_name.startswith('unnamed_'):
|
|
unnamed_counter += 1
|
|
self.fields_mapping[label_text] = input_name
|
|
processed_inputs.add(id(input_field))
|
|
continue
|
|
|
|
# Priority 3 & 4: Parent-level associations
|
|
for label in self.form.find_all('label'):
|
|
label_text = label.get_text(strip=True)
|
|
|
|
# Skip if this label was already processed
|
|
if label_text in self.fields_mapping:
|
|
continue
|
|
|
|
parent = label.parent
|
|
if parent:
|
|
input_found = False
|
|
|
|
# Priority 3: Look for sibling input with matching for/id
|
|
label_for = label.get('for')
|
|
if label_for:
|
|
for sibling in parent.find_all('input'):
|
|
if sibling.get('id') == label_for and id(sibling) not in processed_inputs:
|
|
input_name = self._get_input_identifier(sibling, unnamed_counter)
|
|
if input_name.startswith('unnamed_'):
|
|
unnamed_counter += 1
|
|
self.fields_mapping[label_text] = input_name
|
|
processed_inputs.add(id(sibling))
|
|
input_found = True
|
|
break
|
|
|
|
# Priority 4: Fallback to proximity if no input found yet
|
|
if not input_found:
|
|
for sibling in parent.find_all('input'):
|
|
if id(sibling) not in processed_inputs:
|
|
input_name = self._get_input_identifier(sibling, unnamed_counter)
|
|
if input_name.startswith('unnamed_'):
|
|
unnamed_counter += 1
|
|
self.fields_mapping[label_text] = input_name
|
|
processed_inputs.add(id(sibling))
|
|
break
|
|
|
|
# Priority 5: Inputs without labels
|
|
for input_field in all_inputs:
|
|
if id(input_field) not in processed_inputs:
|
|
input_name = self._get_input_identifier(input_field, unnamed_counter)
|
|
if input_name.startswith('unnamed_'):
|
|
unnamed_counter += 1
|
|
self.fields_mapping[input_name] = input_name
|
|
|
|
@staticmethod
|
|
def _get_input_identifier(input_field, counter):
|
|
"""
|
|
Get the identifier for an input field.
|
|
|
|
Args:
|
|
input_field: The BeautifulSoup Tag object representing the input.
|
|
counter: Current counter for unnamed inputs.
|
|
|
|
Returns:
|
|
The input name, id, or a generated "unnamed_X" identifier.
|
|
"""
|
|
if input_field.get('name'):
|
|
return input_field['name']
|
|
elif input_field.get('id'):
|
|
return input_field['id']
|
|
else:
|
|
return f"unnamed_{counter}"
|
|
|
|
@staticmethod
|
|
def _convert_number(value):
|
|
"""
|
|
Convert a string value to int or float.
|
|
|
|
Args:
|
|
value: String value to convert.
|
|
|
|
Returns:
|
|
int, float, or empty string if conversion fails.
|
|
"""
|
|
if not value or value.strip() == '':
|
|
return ''
|
|
|
|
try:
|
|
# Try float first to detect decimal numbers
|
|
if '.' in value or 'e' in value.lower():
|
|
return float(value)
|
|
else:
|
|
return int(value)
|
|
except ValueError:
|
|
return value
|
|
|
|
@staticmethod
|
|
def _convert_value(value):
|
|
"""
|
|
Analyze and convert a value to its appropriate type.
|
|
|
|
Conversion priority:
|
|
1. Boolean keywords (true/false)
|
|
2. Float (contains decimal point)
|
|
3. Int (numeric)
|
|
4. Empty string
|
|
5. String (default)
|
|
|
|
Args:
|
|
value: String value to convert.
|
|
|
|
Returns:
|
|
Converted value with appropriate type (bool, float, int, or str).
|
|
"""
|
|
if not value or value.strip() == '':
|
|
return ''
|
|
|
|
value_lower = value.lower().strip()
|
|
|
|
# Check for boolean
|
|
if value_lower in ('true', 'false'):
|
|
return value_lower == 'true'
|
|
|
|
# Check for numeric values
|
|
try:
|
|
# Check for float (has decimal point or scientific notation)
|
|
if '.' in value or 'e' in value_lower:
|
|
return float(value)
|
|
# Try int
|
|
else:
|
|
return int(value)
|
|
except ValueError:
|
|
pass
|
|
|
|
# Default to string
|
|
return value
|
|
|
|
|
|
class MyTestClient:
|
|
"""
|
|
A test client helper for FastHTML applications that provides
|
|
a more user-friendly API for testing HTML responses.
|
|
|
|
This class wraps Starlette's TestClient and provides methods
|
|
to verify page content in a way similar to NiceGui's test fixtures.
|
|
"""
|
|
|
|
def __init__(self, app: FastHTML, parent_levels: int = 1):
|
|
"""
|
|
Initialize the test client.
|
|
|
|
Args:
|
|
app: The FastHTML application to test.
|
|
parent_levels: Number of parent levels to show in error messages (default: 1).
|
|
"""
|
|
self.app = app
|
|
self.client = TestClient(app)
|
|
self._content = None
|
|
self._soup = None
|
|
self._session = str(uuid.uuid4())
|
|
self.parent_levels = parent_levels
|
|
|
|
# make sure that the commands are mounted
|
|
mount_commands(self.app)
|
|
|
|
def open(self, path: str) -> Self:
|
|
"""
|
|
Open a page and store its content for subsequent assertions.
|
|
|
|
Args:
|
|
path: The URL path to request (e.g., '/home', '/api/users').
|
|
|
|
Returns:
|
|
self: Returns the client instance for method chaining.
|
|
|
|
Raises:
|
|
AssertionError: If the response status code is not 200.
|
|
"""
|
|
|
|
res = self.client.get(path)
|
|
assert res.status_code == 200, (
|
|
f"Failed to open '{path}'. "
|
|
f"status code={res.status_code} : reason='{res.text}'"
|
|
)
|
|
|
|
self.set_content(res.text)
|
|
return self
|
|
|
|
def send_request(self, method: str, url: str, headers: dict = None, data=None, json_data=None):
|
|
if json_data is not None:
|
|
json_data['session'] = self._session
|
|
|
|
res = self.client.request(
|
|
method,
|
|
url,
|
|
headers=headers,
|
|
data=data, # For form data
|
|
json=json_data # For JSON bodies (e.g., from hx_vals)
|
|
)
|
|
|
|
assert res.status_code == 200, (
|
|
f"Failed to send request '{method=}', {url=}. "
|
|
f"status code={res.status_code} : reason='{res.text}'"
|
|
)
|
|
|
|
self.set_content(res.text)
|
|
return self
|
|
|
|
def should_see(self, text: str) -> Self:
|
|
"""
|
|
Assert that the given text is present in the visible page content.
|
|
|
|
This method parses the HTML and searches only in the visible text,
|
|
ignoring HTML tags and attributes.
|
|
|
|
Args:
|
|
text: The text string to search for (case-sensitive).
|
|
|
|
Returns:
|
|
self: Returns the client instance for method chaining.
|
|
|
|
Raises:
|
|
AssertionError: If the text is not found in the page content.
|
|
ValueError: If no page has been opened yet.
|
|
"""
|
|
|
|
def clean_text(txt):
|
|
return "\n".join(line for line in txt.splitlines() if line.strip())
|
|
|
|
|
|
if self._content is None:
|
|
raise ValueError(
|
|
"No page content available. Call open() before should_see()."
|
|
)
|
|
|
|
visible_text = self._soup.get_text()
|
|
|
|
if text not in visible_text:
|
|
# Provide a snippet of the actual content for debugging
|
|
snippet_length = 200
|
|
content_snippet = clean_text(
|
|
visible_text[:snippet_length] + "..."
|
|
if len(visible_text) > snippet_length
|
|
else visible_text
|
|
)
|
|
raise AssertionError(
|
|
f"Expected to see '{text}' in page content but it was not found.\n"
|
|
f"Visible content (first {snippet_length} chars): {content_snippet}"
|
|
)
|
|
|
|
return self
|
|
|
|
def should_not_see(self, text: str) -> Self:
|
|
"""
|
|
Assert that the given text is NOT present in the visible page content.
|
|
|
|
This method parses the HTML and searches only in the visible text,
|
|
ignoring HTML tags and attributes.
|
|
|
|
Args:
|
|
text: The text string that should not be present (case-sensitive).
|
|
|
|
Returns:
|
|
self: Returns the client instance for method chaining.
|
|
|
|
Raises:
|
|
AssertionError: If the text is found in the page content.
|
|
ValueError: If no page has been opened yet.
|
|
"""
|
|
if self._content is None:
|
|
raise ValueError(
|
|
"No page content available. Call open() before should_not_see()."
|
|
)
|
|
|
|
visible_text = self._soup.get_text()
|
|
|
|
if text in visible_text:
|
|
element = self._find_visible_text_element(self._soup, text)
|
|
|
|
if element:
|
|
context = self._format_element_with_context(element, self.parent_levels)
|
|
error_msg = (
|
|
f"Expected NOT to see '{text}' in page content but it was found.\n"
|
|
f"Found in:\n{context}"
|
|
)
|
|
else:
|
|
error_msg = (
|
|
f"Expected NOT to see '{text}' in page content but it was found.\n"
|
|
f"Unable to locate the element containing this text."
|
|
)
|
|
|
|
raise AssertionError(error_msg)
|
|
|
|
return self
|
|
|
|
def find_element(self, selector: str) -> TestableElement:
|
|
"""
|
|
Find a single HTML element using a CSS selector.
|
|
|
|
This method searches for elements matching the given CSS selector.
|
|
It expects to find exactly one matching element.
|
|
|
|
Args:
|
|
selector: A CSS selector string (e.g., '#my-id', '.my-class', 'button.primary').
|
|
|
|
Returns:
|
|
TestableElement: A testable element wrapping the HTML fragment.
|
|
|
|
Raises:
|
|
ValueError: If no page has been opened yet.
|
|
AssertionError: If no element or multiple elements match the selector.
|
|
|
|
Examples:
|
|
element = client.open('/').find_element('#login-button')
|
|
element = client.find_element('button.primary')
|
|
"""
|
|
if self._content is None:
|
|
raise ValueError(
|
|
"No page content available. Call open() before find_element()."
|
|
)
|
|
|
|
results = self._soup.select(selector)
|
|
|
|
if len(results) == 0:
|
|
raise AssertionError(
|
|
f"No element found matching selector '{selector}'."
|
|
)
|
|
elif len(results) == 1:
|
|
return TestableElement(self, results[0])
|
|
else:
|
|
raise AssertionError(
|
|
f"Found {len(results)} elements matching selector '{selector}'. Expected exactly 1."
|
|
)
|
|
|
|
def find_form(self, fields: list = None) -> TestableForm:
|
|
"""
|
|
Find a form element in the page content.
|
|
Can provide title of the fields to ease the search
|
|
:param fields:
|
|
:return:
|
|
"""
|
|
if self._content is None:
|
|
raise ValueError(
|
|
"No page content available. Call open() before find_form()."
|
|
)
|
|
|
|
results = self._soup.select("form")
|
|
if len(results) == 0:
|
|
raise AssertionError(
|
|
f"No form found."
|
|
)
|
|
|
|
if fields is None:
|
|
remaining = [TestableForm(self, form) for form in results]
|
|
else:
|
|
remaining = []
|
|
for form in results:
|
|
testable_form = TestableForm(self, form)
|
|
if all(testable_form.translate(field) in testable_form.fields for field in fields):
|
|
remaining.append(testable_form)
|
|
|
|
if len(remaining) == 1:
|
|
return remaining[0]
|
|
else:
|
|
raise AssertionError(
|
|
f"Found {len(remaining)} forms (with the specified fields). Expected exactly 1."
|
|
)
|
|
|
|
def get_content(self) -> str:
|
|
"""
|
|
Get the raw HTML content of the last opened page.
|
|
|
|
Returns:
|
|
The HTML content as a string, or None if no page has been opened.
|
|
"""
|
|
return self._content
|
|
|
|
def set_content(self, content: str) -> Self:
|
|
"""
|
|
Set the HTML content and parse it with BeautifulSoup.
|
|
|
|
Args:
|
|
content: The HTML content string to set.
|
|
"""
|
|
self._content = content
|
|
self._soup = BeautifulSoup(content, 'html.parser')
|
|
return self
|
|
|
|
@staticmethod
|
|
def _find_visible_text_element(soup, text: str):
|
|
"""
|
|
Find the first element containing the visible text.
|
|
|
|
This method traverses the BeautifulSoup tree to find the first element
|
|
whose visible text content (including descendants) contains the search text.
|
|
|
|
Args:
|
|
soup: BeautifulSoup object representing the parsed HTML.
|
|
text: The text to search for.
|
|
|
|
Returns:
|
|
BeautifulSoup element containing the text, or None if not found.
|
|
"""
|
|
# Traverse all elements in the document
|
|
for element in soup.descendants:
|
|
# Skip NavigableString nodes, we want Tag elements
|
|
if not isinstance(element, Tag):
|
|
continue
|
|
|
|
# Get visible text of this element and its descendants
|
|
element_text = element.get_text()
|
|
|
|
# Check if our search text is in this element's visible text
|
|
if text in element_text:
|
|
# Found it! But we want the smallest element containing the text
|
|
# So let's check if any of its children also contain the text
|
|
found_in_child = False
|
|
|
|
for child in element.children:
|
|
if isinstance(child, Tag) and text in child.get_text():
|
|
found_in_child = True
|
|
break
|
|
|
|
# If no child contains the text, this is our target element
|
|
if not found_in_child:
|
|
return element
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def _indent_html(html_str: str, indent: int = 2):
|
|
"""
|
|
Add indentation to HTML string.
|
|
|
|
Args:
|
|
html_str: HTML string to indent.
|
|
indent: Number of spaces for indentation.
|
|
|
|
Returns:
|
|
str: Indented HTML string.
|
|
"""
|
|
lines = html_str.split('\n')
|
|
indented_lines = [' ' * indent + line for line in lines if line.strip()]
|
|
return '\n'.join(indented_lines)
|
|
|
|
def _format_element_with_context(self, element, parent_levels: int):
|
|
"""
|
|
Format an element with its parent context for display.
|
|
|
|
Args:
|
|
element: BeautifulSoup element to format.
|
|
parent_levels: Number of parent levels to include.
|
|
|
|
Returns:
|
|
str: Formatted HTML string with indentation.
|
|
"""
|
|
# Collect the element and its parents
|
|
elements_to_show = [element]
|
|
current = element
|
|
|
|
for _ in range(parent_levels):
|
|
if current.parent and current.parent.name: # Skip NavigableString parents
|
|
elements_to_show.insert(0, current.parent)
|
|
current = current.parent
|
|
else:
|
|
break
|
|
|
|
# Format the top-level element with proper indentation
|
|
if len(elements_to_show) == 1:
|
|
return self._indent_html(str(element), indent=2)
|
|
|
|
# Build the nested structure
|
|
result = self._build_nested_context(elements_to_show, element)
|
|
return self._indent_html(result, indent=2)
|
|
|
|
def _build_nested_context(self, elements_chain, target_element):
|
|
"""
|
|
Build nested HTML context showing parents and target element.
|
|
|
|
Args:
|
|
elements_chain: List of elements from outermost parent to target.
|
|
target_element: The element that contains the searched text.
|
|
|
|
Returns:
|
|
str: Nested HTML structure.
|
|
"""
|
|
if len(elements_chain) == 1:
|
|
return str(target_element)
|
|
|
|
# Get the outermost element
|
|
outer = elements_chain[0]
|
|
|
|
# Start with opening tag
|
|
result = f"<{outer.name}"
|
|
if outer.attrs:
|
|
attrs = ' '.join(f'{k}="{v}"' if not isinstance(v, list) else f'{k}="{" ".join(v)}"'
|
|
for k, v in outer.attrs.items())
|
|
result += f" {attrs}"
|
|
result += ">\n"
|
|
|
|
# Add nested content
|
|
if len(elements_chain) == 2:
|
|
# This is the target element
|
|
result += self._indent_html(str(target_element), indent=2) + "\n"
|
|
else:
|
|
# Recursive call for deeper nesting
|
|
nested = self._build_nested_context(elements_chain[1:], target_element)
|
|
result += self._indent_html(nested, indent=2) + "\n"
|
|
|
|
# Closing tag
|
|
result += f"</{outer.name}>"
|
|
|
|
return result
|