105 lines
3.3 KiB
Python
105 lines
3.3 KiB
Python
from dataclasses import dataclass
|
|
|
|
from myfasthtml.core.matching_utils import fuzzy_matching, subsequence_matching
|
|
|
|
|
|
class TestFuzzyMatching:
|
|
def test_i_can_find_exact_match_with_fuzzy(self):
|
|
# Exact match should always pass
|
|
choices = ["hello"]
|
|
result = fuzzy_matching("hello", choices)
|
|
assert len(result) == 1
|
|
assert result[0] == "hello"
|
|
|
|
def test_i_can_find_close_match_with_fuzzy(self):
|
|
# "helo.txt" should match "hello.txt" with high similarity
|
|
choices = ["hello"]
|
|
result = fuzzy_matching("helo", choices, similarity_threshold=0.7)
|
|
assert len(result) == 1
|
|
assert result[0] == "hello"
|
|
|
|
def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
|
|
# "world.txt" should not match "hello.txt"
|
|
choices = ["hello"]
|
|
result = fuzzy_matching("world", choices, similarity_threshold=0.7)
|
|
assert len(result) == 0
|
|
|
|
def test_i_can_sort_by_similarity_in_fuzzy(self):
|
|
# hello has a higher similarity than helo
|
|
choices = [
|
|
"hello",
|
|
"helo",
|
|
]
|
|
result = fuzzy_matching("hello", choices, similarity_threshold=0.7)
|
|
assert result == ["hello", "helo"]
|
|
|
|
|
|
def test_i_can_find_on_object(self):
|
|
@dataclass
|
|
class DummyObject:
|
|
value: str
|
|
id: str
|
|
|
|
choices = [
|
|
DummyObject("helo", "1"),
|
|
DummyObject("hello", "2"),
|
|
DummyObject("xyz", "3"),
|
|
]
|
|
result = fuzzy_matching("hello", choices, get_attr=lambda x: x.value)
|
|
assert len(result) == 2
|
|
assert result == [DummyObject("hello", "2"), DummyObject("helo", "1")]
|
|
|
|
|
|
class TestSubsequenceMatching:
|
|
def test_i_can_match_subsequence_simple(self):
|
|
# "abg" should match "AlphaBetaGamma"
|
|
choices = ["AlphaBetaGamma"]
|
|
result = subsequence_matching("abg", choices)
|
|
assert len(result) == 1
|
|
assert result[0] == "AlphaBetaGamma"
|
|
|
|
def test_i_can_match_subsequence_simple_case_insensitive(self):
|
|
# "abg" should match "alphabetagamma"
|
|
choices = ["alphabetagamma"]
|
|
result = subsequence_matching("abg", choices)
|
|
assert len(result) == 1
|
|
assert result[0] == "alphabetagamma"
|
|
|
|
def test_i_cannot_match_wrong_order_subsequence(self):
|
|
# the order is wrong
|
|
choices = ["AlphaBetaGamma"]
|
|
result = subsequence_matching("gba", choices)
|
|
assert len(result) == 0
|
|
|
|
def test_i_can_match_multiple_documents_subsequence(self):
|
|
# "abg" should match both filenames, but "AlphaBetaGamma" has a higher score
|
|
choices = [
|
|
"AlphaBetaGamma",
|
|
"HalleBerryIsGone",
|
|
]
|
|
result = subsequence_matching("abg", choices)
|
|
assert len(result) == 2
|
|
assert result[0] == "AlphaBetaGamma"
|
|
assert result[1] == "HalleBerryIsGone"
|
|
|
|
def test_i_cannot_match_unrelated_subsequence(self):
|
|
# "xyz" should not match any file
|
|
choices = ["AlphaBetaGamma"]
|
|
result = subsequence_matching("xyz", choices)
|
|
assert len(result) == 0
|
|
|
|
def test_i_can_match_on_object(self):
|
|
@dataclass
|
|
class DummyObject:
|
|
value: str
|
|
id: str
|
|
|
|
choices = [
|
|
DummyObject("HalleBerryIsGone", "1"),
|
|
DummyObject("AlphaBetaGamma", "2"),
|
|
DummyObject("xyz", "3"),
|
|
]
|
|
|
|
result = subsequence_matching("abg", choices, get_attr=lambda x: x.value)
|
|
assert len(result) == 2
|
|
assert result == [DummyObject("AlphaBetaGamma", "2"), DummyObject("HalleBerryIsGone", "1")] |