MyFastHtml/tests/core/test_matching_utils.py

from dataclasses import dataclass

from myfasthtml.core.matching_utils import fuzzy_matching, subsequence_matching


class TestFuzzyMatching:
  def test_i_can_find_exact_match_with_fuzzy(self):
    # Exact match should always pass
    choices = ["hello"]
    result = fuzzy_matching("hello", choices)
    assert len(result) == 1
    assert result[0] == "hello"

  def test_i_can_find_close_match_with_fuzzy(self):
    # "helo.txt" should match "hello.txt" with high similarity
    choices = ["hello"]
    result = fuzzy_matching("helo", choices, similarity_threshold=0.7)
    assert len(result) == 1
    assert result[0] == "hello"

  def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
    # "world.txt" should not match "hello.txt"
    choices = ["hello"]
    result = fuzzy_matching("world", choices, similarity_threshold=0.7)
    assert len(result) == 0

  def test_i_can_sort_by_similarity_in_fuzzy(self):
    # hello has a higher similarity than helo
    choices = [
        "hello",
        "helo",
    ]
    result = fuzzy_matching("hello", choices, similarity_threshold=0.7)
    assert result == ["hello", "helo"]


  def test_i_can_find_on_object(self):
    @dataclass
    class DummyObject:
      value: str
      id: str

    choices = [
        DummyObject("helo", "1"),
        DummyObject("hello", "2"),
        DummyObject("xyz", "3"),
    ]
    result = fuzzy_matching("hello", choices, get_attr=lambda x: x.value)
    assert len(result) == 2
    assert result == [DummyObject("hello", "2"), DummyObject("helo", "1")]


class TestSubsequenceMatching:
  def test_i_can_match_subsequence_simple(self):
    # "abg" should match "AlphaBetaGamma"
    choices = ["AlphaBetaGamma"]
    result = subsequence_matching("abg", choices)
    assert len(result) == 1
    assert result[0] == "AlphaBetaGamma"

  def test_i_can_match_subsequence_simple_case_insensitive(self):
    # "abg" should match "alphabetagamma"
    choices = ["alphabetagamma"]
    result = subsequence_matching("abg", choices)
    assert len(result) == 1
    assert result[0] == "alphabetagamma"

  def test_i_cannot_match_wrong_order_subsequence(self):
    #  the order is wrong
    choices = ["AlphaBetaGamma"]
    result = subsequence_matching("gba", choices)
    assert len(result) == 0

  def test_i_can_match_multiple_documents_subsequence(self):
    # "abg" should match both filenames, but "AlphaBetaGamma" has a higher score
    choices = [
        "AlphaBetaGamma",
        "HalleBerryIsGone",
    ]
    result = subsequence_matching("abg", choices)
    assert len(result) == 2
    assert result[0] == "AlphaBetaGamma"
    assert result[1] == "HalleBerryIsGone"

  def test_i_cannot_match_unrelated_subsequence(self):
    # "xyz" should not match any file
    choices = ["AlphaBetaGamma"]
    result = subsequence_matching("xyz", choices)
    assert len(result) == 0

  def test_i_can_match_on_object(self):
    @dataclass
    class DummyObject:
      value: str
      id: str

    choices = [
        DummyObject("HalleBerryIsGone", "1"),
        DummyObject("AlphaBetaGamma", "2"),
        DummyObject("xyz", "3"),
    ]

    result = subsequence_matching("abg", choices, get_attr=lambda x: x.value)
    assert len(result) == 2
    assert result == [DummyObject("AlphaBetaGamma", "2"), DummyObject("HalleBerryIsGone", "1")]