Files
MyFastHtml/tests/core/test_matching_utils.py
2025-11-26 20:53:12 +01:00

105 lines
3.3 KiB
Python

from dataclasses import dataclass
from myfasthtml.core.matching_utils import fuzzy_matching, subsequence_matching
class TestFuzzyMatching:
def test_i_can_find_exact_match_with_fuzzy(self):
# Exact match should always pass
choices = ["hello"]
result = fuzzy_matching("hello", choices)
assert len(result) == 1
assert result[0] == "hello"
def test_i_can_find_close_match_with_fuzzy(self):
# "helo.txt" should match "hello.txt" with high similarity
choices = ["hello"]
result = fuzzy_matching("helo", choices, similarity_threshold=0.7)
assert len(result) == 1
assert result[0] == "hello"
def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
# "world.txt" should not match "hello.txt"
choices = ["hello"]
result = fuzzy_matching("world", choices, similarity_threshold=0.7)
assert len(result) == 0
def test_i_can_sort_by_similarity_in_fuzzy(self):
# hello has a higher similarity than helo
choices = [
"hello",
"helo",
]
result = fuzzy_matching("hello", choices, similarity_threshold=0.7)
assert result == ["hello", "helo"]
def test_i_can_find_on_object(self):
@dataclass
class DummyObject:
value: str
id: str
choices = [
DummyObject("helo", "1"),
DummyObject("hello", "2"),
DummyObject("xyz", "3"),
]
result = fuzzy_matching("hello", choices, get_attr=lambda x: x.value)
assert len(result) == 2
assert result == [DummyObject("hello", "2"), DummyObject("helo", "1")]
class TestSubsequenceMatching:
def test_i_can_match_subsequence_simple(self):
# "abg" should match "AlphaBetaGamma"
choices = ["AlphaBetaGamma"]
result = subsequence_matching("abg", choices)
assert len(result) == 1
assert result[0] == "AlphaBetaGamma"
def test_i_can_match_subsequence_simple_case_insensitive(self):
# "abg" should match "alphabetagamma"
choices = ["alphabetagamma"]
result = subsequence_matching("abg", choices)
assert len(result) == 1
assert result[0] == "alphabetagamma"
def test_i_cannot_match_wrong_order_subsequence(self):
# the order is wrong
choices = ["AlphaBetaGamma"]
result = subsequence_matching("gba", choices)
assert len(result) == 0
def test_i_can_match_multiple_documents_subsequence(self):
# "abg" should match both filenames, but "AlphaBetaGamma" has a higher score
choices = [
"AlphaBetaGamma",
"HalleBerryIsGone",
]
result = subsequence_matching("abg", choices)
assert len(result) == 2
assert result[0] == "AlphaBetaGamma"
assert result[1] == "HalleBerryIsGone"
def test_i_cannot_match_unrelated_subsequence(self):
# "xyz" should not match any file
choices = ["AlphaBetaGamma"]
result = subsequence_matching("xyz", choices)
assert len(result) == 0
def test_i_can_match_on_object(self):
@dataclass
class DummyObject:
value: str
id: str
choices = [
DummyObject("HalleBerryIsGone", "1"),
DummyObject("AlphaBetaGamma", "2"),
DummyObject("xyz", "3"),
]
result = subsequence_matching("abg", choices, get_attr=lambda x: x.value)
assert len(result) == 2
assert result == [DummyObject("AlphaBetaGamma", "2"), DummyObject("HalleBerryIsGone", "1")]