Implemented default pipeline
This commit is contained in:
0
tests/utils/__init__.py
Normal file
0
tests/utils/__init__.py
Normal file
89
tests/utils/test_document_matching.py
Normal file
89
tests/utils/test_document_matching.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from app.models.document import FileDocument, FileType
|
||||
from app.utils.document_matching import fuzzy_matching, subsequence_matching
|
||||
|
||||
|
||||
def get_doc(filename: str = None):
|
||||
"""Sample FileDocument data for testing."""
|
||||
return FileDocument(
|
||||
filename=f"{filename}",
|
||||
filepath=f"/path/to/{filename}",
|
||||
file_hash="a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456",
|
||||
file_type=FileType(os.path.splitext(filename)[1].lstrip(".") or "txt"),
|
||||
detected_at=datetime.now(),
|
||||
file_size=1024,
|
||||
mime_type="application/pdf"
|
||||
)
|
||||
|
||||
|
||||
class TestFuzzyMatching:
|
||||
def test_i_can_find_exact_match_with_fuzzy(self):
|
||||
# Exact match should always pass
|
||||
docs = [get_doc(filename="hello.txt")]
|
||||
result = fuzzy_matching("hello.txt", docs)
|
||||
assert len(result) == 1
|
||||
assert result[0].filename == "hello.txt"
|
||||
|
||||
def test_i_can_find_close_match_with_fuzzy(self):
|
||||
# "helo.txt" should match "hello.txt" with high similarity
|
||||
docs = [get_doc(filename="hello.txt")]
|
||||
result = fuzzy_matching("helo.txt", docs, similarity_threshold=0.7)
|
||||
assert len(result) == 1
|
||||
assert result[0].filename == "hello.txt"
|
||||
|
||||
def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
|
||||
# "world.txt" should not match "hello.txt"
|
||||
docs = [get_doc(filename="hello.txt")]
|
||||
result = fuzzy_matching("world.txt", docs, similarity_threshold=0.7)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_sort_by_similarity_in_fuzzy(self):
|
||||
# "helo.txt" is closer to "hello.txt" than "hllll.txt"
|
||||
docs = [
|
||||
get_doc(filename="hello.txt"),
|
||||
get_doc(filename="hllll.txt"),
|
||||
]
|
||||
result = fuzzy_matching("helo.txt", docs, similarity_threshold=0.5)
|
||||
assert result[0].filename == "hello.txt"
|
||||
|
||||
|
||||
class TestSubsequenceMatching:
|
||||
def test_i_can_match_subsequence_simple(self):
|
||||
# "ifb" should match "ilFaitBeau.txt"
|
||||
docs = [get_doc(filename="ilFaitBeau.txt")]
|
||||
result = subsequence_matching("ifb", docs)
|
||||
assert len(result) == 1
|
||||
assert result[0].filename == "ilFaitBeau.txt"
|
||||
|
||||
def test_i_cannot_match_wrong_order_subsequence(self):
|
||||
# "fib" should not match "ilFaitBeau.txt" because the order is wrong
|
||||
docs = [get_doc(filename="ilFaitBeau.txt")]
|
||||
result = subsequence_matching("bfi", docs)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_match_multiple_documents_subsequence(self):
|
||||
# "ifb" should match both filenames, but "ilFaitBeau.txt" has a higher score
|
||||
docs = [
|
||||
get_doc(filename="ilFaitBeau.txt"),
|
||||
get_doc(filename="information_base.txt"),
|
||||
]
|
||||
result = subsequence_matching("ifb", docs)
|
||||
assert len(result) == 2
|
||||
assert result[0].filename == "ilFaitBeau.txt"
|
||||
assert result[1].filename == "information_base.txt"
|
||||
|
||||
def test_i_cannot_match_unrelated_subsequence(self):
|
||||
# "xyz" should not match any file
|
||||
docs = [get_doc(filename="ilFaitBeau.txt")]
|
||||
result = subsequence_matching("xyz", docs)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_i_can_handle_case_insensitivity_in_subsequence(self):
|
||||
# Matching should be case-insensitive
|
||||
docs = [get_doc(filename="HelloWorld.txt")]
|
||||
result = subsequence_matching("hw", docs)
|
||||
assert len(result) == 1
|
||||
assert result[0].filename == "HelloWorld.txt"
|
||||
105
tests/utils/test_security.py
Normal file
105
tests/utils/test_security.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Unit tests for password security utilities.
|
||||
|
||||
Tests the bcrypt-based password hashing and verification functions
|
||||
including edge cases and error handling.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from app.utils.security import hash_password, verify_password
|
||||
|
||||
|
||||
def test_i_can_hash_password():
|
||||
"""Test that a password is correctly hashed and different from original."""
|
||||
password = "my_secure_password"
|
||||
hashed = hash_password(password)
|
||||
|
||||
# Hash should be different from original password
|
||||
assert hashed != password
|
||||
|
||||
# Hash should be a non-empty string
|
||||
assert isinstance(hashed, str)
|
||||
assert len(hashed) > 0
|
||||
|
||||
# Hash should start with bcrypt identifier
|
||||
assert hashed.startswith("$2b$")
|
||||
|
||||
|
||||
def test_same_password_generates_different_hashes():
|
||||
"""Test that the salt generates different hashes for the same password."""
|
||||
password = "identical_password"
|
||||
|
||||
hash1 = hash_password(password)
|
||||
hash2 = hash_password(password)
|
||||
|
||||
# Same password should generate different hashes due to salt
|
||||
assert hash1 != hash2
|
||||
|
||||
# But both should be valid bcrypt hashes
|
||||
assert hash1.startswith("$2b$")
|
||||
assert hash2.startswith("$2b$")
|
||||
|
||||
|
||||
def test_i_can_verify_correct_password():
|
||||
"""Test that a correct password is validated against its hash."""
|
||||
password = "correct_password"
|
||||
hashed = hash_password(password)
|
||||
|
||||
# Correct password should verify successfully
|
||||
assert verify_password(password, hashed) is True
|
||||
|
||||
|
||||
def test_i_cannot_verify_incorrect_password():
|
||||
"""Test that an incorrect password is rejected."""
|
||||
password = "correct_password"
|
||||
wrong_password = "wrong_password"
|
||||
hashed = hash_password(password)
|
||||
|
||||
# Wrong password should fail verification
|
||||
assert verify_password(wrong_password, hashed) is False
|
||||
|
||||
|
||||
def test_i_cannot_hash_empty_password():
|
||||
"""Test that empty passwords are rejected during hashing."""
|
||||
# Empty string should raise ValueError
|
||||
with pytest.raises(ValueError, match="Password cannot be empty or None"):
|
||||
hash_password("")
|
||||
|
||||
# None should raise ValueError
|
||||
with pytest.raises(ValueError, match="Password cannot be empty or None"):
|
||||
hash_password(None)
|
||||
|
||||
|
||||
def test_i_cannot_verify_with_malformed_hash():
|
||||
"""Test that malformed hashes are rejected during verification."""
|
||||
password = "test_password"
|
||||
malformed_hash = "not_a_valid_bcrypt_hash"
|
||||
|
||||
# Malformed hash should raise RuntimeError
|
||||
with pytest.raises(RuntimeError, match="Invalid hash format"):
|
||||
verify_password(password, malformed_hash)
|
||||
|
||||
|
||||
def test_i_cannot_verify_with_none_values():
|
||||
"""Test that None values are rejected during verification."""
|
||||
password = "test_password"
|
||||
hashed = hash_password(password)
|
||||
|
||||
# None password should raise ValueError
|
||||
with pytest.raises(ValueError, match="Password and hashed_password cannot be empty or None"):
|
||||
verify_password(None, hashed)
|
||||
|
||||
# None hash should raise ValueError
|
||||
with pytest.raises(ValueError, match="Password and hashed_password cannot be empty or None"):
|
||||
verify_password(password, None)
|
||||
|
||||
# Both None should raise ValueError
|
||||
with pytest.raises(ValueError, match="Password and hashed_password cannot be empty or None"):
|
||||
verify_password(None, None)
|
||||
|
||||
# Empty strings should also raise ValueError
|
||||
with pytest.raises(ValueError, match="Password and hashed_password cannot be empty or None"):
|
||||
verify_password("", hashed)
|
||||
|
||||
with pytest.raises(ValueError, match="Password and hashed_password cannot be empty or None"):
|
||||
verify_password(password, "")
|
||||
Reference in New Issue
Block a user