from dataclasses import dataclass from myfasthtml.core.matching_utils import fuzzy_matching, subsequence_matching class TestFuzzyMatching: def test_i_can_find_exact_match_with_fuzzy(self): # Exact match should always pass choices = ["hello"] result = fuzzy_matching("hello", choices) assert len(result) == 1 assert result[0] == "hello" def test_i_can_find_close_match_with_fuzzy(self): # "helo.txt" should match "hello.txt" with high similarity choices = ["hello"] result = fuzzy_matching("helo", choices, similarity_threshold=0.7) assert len(result) == 1 assert result[0] == "hello" def test_i_cannot_find_dissimilar_match_with_fuzzy(self): # "world.txt" should not match "hello.txt" choices = ["hello"] result = fuzzy_matching("world", choices, similarity_threshold=0.7) assert len(result) == 0 def test_i_can_sort_by_similarity_in_fuzzy(self): # hello has a higher similarity than helo choices = [ "hello", "helo", ] result = fuzzy_matching("hello", choices, similarity_threshold=0.7) assert result == ["hello", "helo"] def test_i_can_find_on_object(self): @dataclass class DummyObject: value: str id: str choices = [ DummyObject("helo", "1"), DummyObject("hello", "2"), DummyObject("xyz", "3"), ] result = fuzzy_matching("hello", choices, get_attr=lambda x: x.value) assert len(result) == 2 assert result == [DummyObject("hello", "2"), DummyObject("helo", "1")] class TestSubsequenceMatching: def test_i_can_match_subsequence_simple(self): # "abg" should match "AlphaBetaGamma" choices = ["AlphaBetaGamma"] result = subsequence_matching("abg", choices) assert len(result) == 1 assert result[0] == "AlphaBetaGamma" def test_i_can_match_subsequence_simple_case_insensitive(self): # "abg" should match "alphabetagamma" choices = ["alphabetagamma"] result = subsequence_matching("abg", choices) assert len(result) == 1 assert result[0] == "alphabetagamma" def test_i_cannot_match_wrong_order_subsequence(self): # the order is wrong choices = ["AlphaBetaGamma"] result = subsequence_matching("gba", choices) assert len(result) == 0 def test_i_can_match_multiple_documents_subsequence(self): # "abg" should match both filenames, but "AlphaBetaGamma" has a higher score choices = [ "AlphaBetaGamma", "HalleBerryIsGone", ] result = subsequence_matching("abg", choices) assert len(result) == 2 assert result[0] == "AlphaBetaGamma" assert result[1] == "HalleBerryIsGone" def test_i_cannot_match_unrelated_subsequence(self): # "xyz" should not match any file choices = ["AlphaBetaGamma"] result = subsequence_matching("xyz", choices) assert len(result) == 0 def test_i_can_match_on_object(self): @dataclass class DummyObject: value: str id: str choices = [ DummyObject("HalleBerryIsGone", "1"), DummyObject("AlphaBetaGamma", "2"), DummyObject("xyz", "3"), ] result = subsequence_matching("abg", choices, get_attr=lambda x: x.value) assert len(result) == 2 assert result == [DummyObject("AlphaBetaGamma", "2"), DummyObject("HalleBerryIsGone", "1")]