Added first controls
This commit is contained in:
105
tests/core/test_matching_utils.py
Normal file
105
tests/core/test_matching_utils.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from myfasthtml.core.matching_utils import fuzzy_matching, subsequence_matching
|
||||
|
||||
|
||||
class TestFuzzyMatching:
|
||||
def test_i_can_find_exact_match_with_fuzzy(self):
|
||||
# Exact match should always pass
|
||||
choices = ["hello"]
|
||||
result = fuzzy_matching("hello", choices)
|
||||
assert len(result) == 1
|
||||
assert result[0] == "hello"
|
||||
|
||||
def test_i_can_find_close_match_with_fuzzy(self):
|
||||
# "helo.txt" should match "hello.txt" with high similarity
|
||||
choices = ["hello"]
|
||||
result = fuzzy_matching("helo", choices, similarity_threshold=0.7)
|
||||
assert len(result) == 1
|
||||
assert result[0] == "hello"
|
||||
|
||||
def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
|
||||
# "world.txt" should not match "hello.txt"
|
||||
choices = ["hello"]
|
||||
result = fuzzy_matching("world", choices, similarity_threshold=0.7)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_sort_by_similarity_in_fuzzy(self):
|
||||
# hello has a higher similarity than helo
|
||||
choices = [
|
||||
"hello",
|
||||
"helo",
|
||||
]
|
||||
result = fuzzy_matching("hello", choices, similarity_threshold=0.7)
|
||||
assert result == ["hello", "helo"]
|
||||
|
||||
|
||||
def test_i_can_find_on_object(self):
|
||||
@dataclass
|
||||
class DummyObject:
|
||||
value: str
|
||||
id: str
|
||||
|
||||
choices = [
|
||||
DummyObject("helo", "1"),
|
||||
DummyObject("hello", "2"),
|
||||
DummyObject("xyz", "3"),
|
||||
]
|
||||
result = fuzzy_matching("hello", choices, get_attr=lambda x: x.value)
|
||||
assert len(result) == 2
|
||||
assert result == [DummyObject("hello", "2"), DummyObject("helo", "1")]
|
||||
|
||||
|
||||
class TestSubsequenceMatching:
|
||||
def test_i_can_match_subsequence_simple(self):
|
||||
# "abg" should match "AlphaBetaGamma"
|
||||
choices = ["AlphaBetaGamma"]
|
||||
result = subsequence_matching("abg", choices)
|
||||
assert len(result) == 1
|
||||
assert result[0] == "AlphaBetaGamma"
|
||||
|
||||
def test_i_can_match_subsequence_simple_case_insensitive(self):
|
||||
# "abg" should match "alphabetagamma"
|
||||
choices = ["alphabetagamma"]
|
||||
result = subsequence_matching("abg", choices)
|
||||
assert len(result) == 1
|
||||
assert result[0] == "alphabetagamma"
|
||||
|
||||
def test_i_cannot_match_wrong_order_subsequence(self):
|
||||
# the order is wrong
|
||||
choices = ["AlphaBetaGamma"]
|
||||
result = subsequence_matching("gba", choices)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_match_multiple_documents_subsequence(self):
|
||||
# "abg" should match both filenames, but "AlphaBetaGamma" has a higher score
|
||||
choices = [
|
||||
"AlphaBetaGamma",
|
||||
"HalleBerryIsGone",
|
||||
]
|
||||
result = subsequence_matching("abg", choices)
|
||||
assert len(result) == 2
|
||||
assert result[0] == "AlphaBetaGamma"
|
||||
assert result[1] == "HalleBerryIsGone"
|
||||
|
||||
def test_i_cannot_match_unrelated_subsequence(self):
|
||||
# "xyz" should not match any file
|
||||
choices = ["AlphaBetaGamma"]
|
||||
result = subsequence_matching("xyz", choices)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_match_on_object(self):
|
||||
@dataclass
|
||||
class DummyObject:
|
||||
value: str
|
||||
id: str
|
||||
|
||||
choices = [
|
||||
DummyObject("HalleBerryIsGone", "1"),
|
||||
DummyObject("AlphaBetaGamma", "2"),
|
||||
DummyObject("xyz", "3"),
|
||||
]
|
||||
|
||||
result = subsequence_matching("abg", choices, get_attr=lambda x: x.value)
|
||||
assert len(result) == 2
|
||||
assert result == [DummyObject("AlphaBetaGamma", "2"), DummyObject("HalleBerryIsGone", "1")]
|
||||
Reference in New Issue
Block a user