Implemented default pipeline

This commit is contained in:
2025-09-26 22:08:39 +02:00
parent f1b551d243
commit 4de732b0ae
56 changed files with 4534 additions and 2837 deletions

View File

View File

@@ -0,0 +1,611 @@
"""
Test suite for FileDocumentRepository with async/support.
This module contains comprehensive tests for all FileDocumentRepository methods
using mongomock-motor for in-memory MongoDB testing.
"""
from datetime import datetime
import pytest
from bson import ObjectId
from mongomock.mongo_client import MongoClient
from pymongo.errors import PyMongoError
from app.database.repositories.document_repository import (
FileDocumentRepository,
MatchMethodBase,
SubsequenceMatching,
FuzzyMatching
)
from app.models.document import FileDocument, FileType, ExtractionMethod
@pytest.fixture
def in_memory_repository():
"""Create an in-memory FileDocumentRepository for testing."""
client = MongoClient()
db = client.test_database
repo = FileDocumentRepository(db)
repo.initialize()
return repo
@pytest.fixture
def sample_file_document():
"""Sample FileDocument data for testing."""
return FileDocument(
filename="sample_document.pdf",
filepath="/home/user/documents/sample_document.pdf",
file_type=FileType.PDF,
extraction_method=ExtractionMethod.OCR,
metadata={"pages": 5, "language": "en", "author": "John Doe"},
detected_at=datetime.now(),
file_hash="a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456",
encoding="utf-8",
file_size=1024000,
mime_type="application/pdf"
)
@pytest.fixture
def sample_update_data():
"""Sample update data for testing."""
return {
"extraction_method": ExtractionMethod.HYBRID,
"metadata": {"pages": 10, "language": "fr", "updated": True},
"file_size": 2048000
}
@pytest.fixture
def multiple_sample_files():
"""Multiple FileDocument objects for list/search testing."""
base_time = datetime.now()
return [
FileDocument(
filename="first_doc.txt",
filepath="/docs/first_doc.txt",
file_type=FileType.TXT,
extraction_method=ExtractionMethod.DIRECT_TEXT,
metadata={"words": 500},
detected_at=base_time,
file_hash="hash1" + "0" * 58,
encoding="utf-8",
file_size=5000,
mime_type="text/plain"
),
FileDocument(
filename="second_document.pdf",
filepath="/docs/second_document.pdf",
file_type=FileType.PDF,
extraction_method=ExtractionMethod.OCR,
metadata={"pages": 8},
detected_at=base_time,
file_hash="hash2" + "0" * 58,
encoding="utf-8",
file_size=10000,
mime_type="application/pdf"
),
FileDocument(
filename="third_file.docx",
filepath="/docs/third_file.docx",
file_type=FileType.DOCX,
extraction_method=ExtractionMethod.HYBRID,
metadata={"paragraphs": 15},
detected_at=base_time,
file_hash="hash3" + "0" * 58,
encoding="utf-8",
file_size=15000,
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
]
class TestFileDocumentRepositoryInitialization:
"""Tests for repository initialization."""
def test_i_can_initialize_repository(self):
"""Test repository initialization."""
# Arrange
client = MongoClient()
db = client.test_database
repo = FileDocumentRepository(db)
repo.initialize()
# Act & Assert (should not raise any exception)
assert repo.db is not None
assert repo.collection is not None
# TODO : check that the indexes are created
class TestFileDocumentRepositoryCreation:
"""Tests for file document creation functionality."""
def test_i_can_create_file_document(self, in_memory_repository, sample_file_document):
"""Test successful file document creation."""
# Act
created_file = in_memory_repository.create_document(sample_file_document)
# Assert
assert created_file is not None
assert created_file.filename == sample_file_document.filename
assert created_file.filepath == sample_file_document.filepath
assert created_file.file_type == sample_file_document.file_type
assert created_file.extraction_method == sample_file_document.extraction_method
assert created_file.metadata == sample_file_document.metadata
assert created_file.file_hash == sample_file_document.file_hash
assert created_file.file_size == sample_file_document.file_size
assert created_file.mime_type == sample_file_document.mime_type
assert created_file.id is not None
assert isinstance(created_file.id, ObjectId)
def test_i_can_create_file_document_without_id(self, in_memory_repository, sample_file_document):
"""Test creating file document with _id set to None (should be removed)."""
# Arrange
sample_file_document.id = None
# Act
created_file = in_memory_repository.create_document(sample_file_document)
# Assert
assert created_file is not None
assert created_file.id is not None
assert isinstance(created_file.id, ObjectId)
def test_i_cannot_create_file_document_with_pymongo_error(self, in_memory_repository,
sample_file_document, mocker):
"""Test handling of PyMongo errors during file document creation."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'insert_one', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(ValueError) as exc_info:
in_memory_repository.create_document(sample_file_document)
assert "Failed to create file document" in str(exc_info.value)
class TestFileDocumentRepositoryFinding:
"""Tests for file document finding functionality."""
def test_i_can_find_document_by_valid_id(self, in_memory_repository, sample_file_document):
"""Test finding file document by valid ObjectId."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
# Act
found_file = in_memory_repository.find_document_by_id(str(created_file.id))
# Assert
assert found_file is not None
assert found_file.id == created_file.id
assert found_file.filename == created_file.filename
assert found_file.filepath == created_file.filepath
def test_i_cannot_find_document_with_invalid_id(self, in_memory_repository):
"""Test that invalid ObjectId returns None."""
# Act
found_file = in_memory_repository.find_document_by_id("invalid_id")
# Assert
assert found_file is None
def test_i_cannot_find_document_by_nonexistent_id(self, in_memory_repository):
"""Test that nonexistent but valid ObjectId returns None."""
# Arrange
nonexistent_id = str(ObjectId())
# Act
found_file = in_memory_repository.find_document_by_id(nonexistent_id)
# Assert
assert found_file is None
def test_i_can_find_document_by_file_hash(self, in_memory_repository, sample_file_document):
"""Test finding file document by file hash."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
# Act
found_file = in_memory_repository.find_document_by_hash(sample_file_document.file_hash)
# Assert
assert found_file is not None
assert found_file.file_hash == created_file.file_hash
assert found_file.id == created_file.id
def test_i_cannot_find_document_with_nonexistent_file_hash(self, in_memory_repository):
"""Test that nonexistent file hash returns None."""
# Act
found_file = in_memory_repository.find_document_by_hash("nonexistent_hash")
# Assert
assert found_file is None
def test_i_can_find_document_by_filepath(self, in_memory_repository, sample_file_document):
"""Test finding file document by filepath."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
# Act
found_file = in_memory_repository.find_document_by_filepath(sample_file_document.filepath)
# Assert
assert found_file is not None
assert found_file.filepath == created_file.filepath
assert found_file.id == created_file.id
def test_i_cannot_find_document_with_nonexistent_filepath(self, in_memory_repository):
"""Test that nonexistent filepath returns None."""
# Act
found_file = in_memory_repository.find_document_by_filepath("/nonexistent/path/file.pdf")
# Assert
assert found_file is None
def test_i_cannot_find_document_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during file document finding."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find_one', side_effect=PyMongoError("Database error"))
# Act
found_file = in_memory_repository.find_document_by_hash("test_hash")
# Assert
assert found_file is None
class TestFileDocumentRepositoryNameMatching:
"""Tests for file document name matching functionality."""
def test_i_can_find_documents_by_name_with_fuzzy_matching(self, in_memory_repository, multiple_sample_files):
"""Test finding file documents by filename using fuzzy matching."""
# Arrange
for file_doc in multiple_sample_files:
in_memory_repository.create_document(file_doc)
# Act
fuzzy_method = FuzzyMatching(threshold=0.5)
found_files = in_memory_repository.find_document_by_name("document", fuzzy_method)
# Assert
assert len(found_files) >= 1
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
# Should find files with "document" in the name
found_filenames = [f.filename for f in found_files]
assert any("document" in fname.lower() for fname in found_filenames)
def test_i_can_find_documents_by_name_with_subsequence_matching(self, in_memory_repository,
multiple_sample_files):
"""Test finding file documents by filename using subsequence matching."""
# Arrange
for file_doc in multiple_sample_files:
in_memory_repository.create_document(file_doc)
# Act
subsequence_method = SubsequenceMatching()
found_files = in_memory_repository.find_document_by_name("doc", subsequence_method)
# Assert
assert len(found_files) >= 1
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
def test_i_can_find_documents_by_name_with_default_method(self, in_memory_repository, multiple_sample_files):
"""Test finding file documents by filename with default matching method."""
# Arrange
for file_doc in multiple_sample_files:
in_memory_repository.create_document(file_doc)
# Act
found_files = in_memory_repository.find_document_by_name("first")
# Assert
assert len(found_files) >= 0
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
def test_i_cannot_find_documents_by_name_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during document name matching."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
# Act
found_files = in_memory_repository.find_document_by_name("test")
# Assert
assert found_files == []
class TestFileDocumentRepositoryListing:
"""Tests for file document listing functionality."""
def test_i_can_list_documents_with_default_pagination(self, in_memory_repository, multiple_sample_files):
"""Test listing file documents with default pagination."""
# Arrange
for file_doc in multiple_sample_files:
in_memory_repository.create_document(file_doc)
# Act
files = in_memory_repository.list_documents()
# Assert
assert len(files) == len(multiple_sample_files)
assert all(isinstance(file_doc, FileDocument) for file_doc in files)
def test_i_can_list_documents_with_custom_pagination(self, in_memory_repository, multiple_sample_files):
"""Test listing file documents with custom pagination."""
# Arrange
for file_doc in multiple_sample_files:
in_memory_repository.create_document(file_doc)
# Act
files_page1 = in_memory_repository.list_documents(skip=0, limit=2)
files_page2 = in_memory_repository.list_documents(skip=2, limit=2)
# Assert
assert len(files_page1) == 2
assert len(files_page2) == 1 # Only 3 total files
# Ensure no overlap between pages
page1_ids = [file_doc.id for file_doc in files_page1]
page2_ids = [file_doc.id for file_doc in files_page2]
assert len(set(page1_ids).intersection(set(page2_ids))) == 0
def test_i_can_list_documents_sorted_by_detected_at(self, in_memory_repository, sample_file_document):
"""Test that file documents are sorted by detected_at in descending order."""
# Arrange
file1 = sample_file_document.model_copy()
file1.filepath = "/docs/file1.pdf"
file1.filename = "file1.pdf"
file1.file_hash = "hash1" + "0" * 58
file1.detected_at = datetime(2024, 1, 1, 10, 0, 0)
file2 = sample_file_document.model_copy()
file2.filepath = "/docs/file2.pdf"
file2.filename = "file2.pdf"
file2.file_hash = "hash2" + "0" * 58
file2.detected_at = datetime(2024, 1, 2, 10, 0, 0) # Later date
created_file1 = in_memory_repository.create_document(file1)
created_file2 = in_memory_repository.create_document(file2)
# Act
files = in_memory_repository.list_documents()
# Assert
assert len(files) == 2
# Most recent (latest detected_at) should be first
assert files[0].id == created_file2.id
assert files[1].id == created_file1.id
def test_i_can_list_empty_documents(self, in_memory_repository):
"""Test listing file documents from empty collection."""
# Act
files = in_memory_repository.list_documents()
# Assert
assert files == []
def test_i_cannot_list_documents_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during file document listing."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
# Act
files = in_memory_repository.list_documents()
# Assert
assert files == []
class TestFileDocumentRepositoryUpdate:
"""Tests for file document update functionality."""
def test_i_can_update_document_successfully(self, in_memory_repository, sample_file_document,
sample_update_data):
"""Test successful file document update."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
# Act
updated_file = in_memory_repository.update_document(str(created_file.id), sample_update_data)
# Assert
assert updated_file is not None
assert updated_file.extraction_method == sample_update_data["extraction_method"]
assert updated_file.metadata == sample_update_data["metadata"]
assert updated_file.file_size == sample_update_data["file_size"]
assert updated_file.id == created_file.id
assert updated_file.filename == created_file.filename # Unchanged fields remain
assert updated_file.filepath == created_file.filepath
def test_i_can_update_document_with_partial_data(self, in_memory_repository, sample_file_document):
"""Test updating file document with partial data."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
partial_update = {"file_size": 999999}
# Act
updated_file = in_memory_repository.update_document(str(created_file.id), partial_update)
# Assert
assert updated_file is not None
assert updated_file.file_size == 999999
assert updated_file.filename == created_file.filename # Should remain unchanged
assert updated_file.metadata == created_file.metadata # Should remain unchanged
def test_i_can_update_document_filtering_none_values(self, in_memory_repository, sample_file_document):
"""Test that None values are filtered out from update data."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
update_with_none = {"file_size": 777777, "metadata": None}
# Act
updated_file = in_memory_repository.update_document(str(created_file.id), update_with_none)
# Assert
assert updated_file is not None
assert updated_file.file_size == 777777
assert updated_file.metadata == created_file.metadata # Should remain unchanged (None filtered out)
def test_i_can_update_document_with_empty_data(self, in_memory_repository, sample_file_document):
"""Test updating file document with empty data returns current document."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
empty_update = {}
# Act
result = in_memory_repository.update_document(str(created_file.id), empty_update)
# Assert
assert result is not None
assert result.filename == created_file.filename
assert result.filepath == created_file.filepath
assert result.metadata == created_file.metadata
def test_i_cannot_update_document_with_invalid_id(self, in_memory_repository, sample_update_data):
"""Test that updating with invalid ID returns None."""
# Act
result = in_memory_repository.update_document("invalid_id", sample_update_data)
# Assert
assert result is None
def test_i_cannot_update_nonexistent_document(self, in_memory_repository, sample_update_data):
"""Test that updating nonexistent file document returns None."""
# Arrange
nonexistent_id = str(ObjectId())
# Act
result = in_memory_repository.update_document(nonexistent_id, sample_update_data)
# Assert
assert result is None
def test_i_cannot_update_document_with_pymongo_error(self, in_memory_repository, sample_file_document,
sample_update_data, mocker):
"""Test handling of PyMongo errors during file document update."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
mocker.patch.object(in_memory_repository.collection, 'find_one_and_update',
side_effect=PyMongoError("Database error"))
# Act
result = in_memory_repository.update_document(str(created_file.id), sample_update_data)
# Assert
assert result is None
class TestFileDocumentRepositoryDeletion:
"""Tests for file document deletion functionality."""
def test_i_can_delete_existing_document(self, in_memory_repository, sample_file_document):
"""Test successful file document deletion."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
# Act
deletion_result = in_memory_repository.delete_document(str(created_file.id))
# Assert
assert deletion_result is True
# Verify document is actually deleted
found_file = in_memory_repository.find_document_by_id(str(created_file.id))
assert found_file is None
def test_i_cannot_delete_document_with_invalid_id(self, in_memory_repository):
"""Test that deleting with invalid ID returns False."""
# Act
result = in_memory_repository.delete_document("invalid_id")
# Assert
assert result is False
def test_i_cannot_delete_nonexistent_document(self, in_memory_repository):
"""Test that deleting nonexistent file document returns False."""
# Arrange
nonexistent_id = str(ObjectId())
# Act
result = in_memory_repository.delete_document(nonexistent_id)
# Assert
assert result is False
def test_i_cannot_delete_document_with_pymongo_error(self, in_memory_repository, sample_file_document, mocker):
"""Test handling of PyMongo errors during file document deletion."""
# Arrange
created_file = in_memory_repository.create_document(sample_file_document)
mocker.patch.object(in_memory_repository.collection, 'delete_one', side_effect=PyMongoError("Database error"))
# Act
result = in_memory_repository.delete_document(str(created_file.id))
# Assert
assert result is False
class TestFileDocumentRepositoryUtilities:
"""Tests for utility methods."""
def test_i_can_count_documents(self, in_memory_repository, sample_file_document):
"""Test counting file documents."""
# Arrange
initial_count = in_memory_repository.count_documents()
in_memory_repository.create_document(sample_file_document)
# Act
final_count = in_memory_repository.count_documents()
# Assert
assert final_count == initial_count + 1
def test_i_can_count_zero_documents(self, in_memory_repository):
"""Test counting file documents in empty collection."""
# Act
count = in_memory_repository.count_documents()
# Assert
assert count == 0
def test_i_cannot_count_documents_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during file document counting."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'count_documents', side_effect=PyMongoError("Database error"))
# Act
count = in_memory_repository.count_documents()
# Assert
assert count == 0
class TestMatchingMethods:
"""Tests for matching method classes."""
def test_i_can_create_fuzzy_matching_with_default_threshold(self):
"""Test creating FuzzyMatching with default threshold."""
# Act
fuzzy = FuzzyMatching()
# Assert
assert fuzzy.threshold == 0.6
def test_i_can_create_fuzzy_matching_with_custom_threshold(self):
"""Test creating FuzzyMatching with custom threshold."""
# Act
fuzzy = FuzzyMatching(threshold=0.8)
# Assert
assert fuzzy.threshold == 0.8
def test_i_can_create_subsequence_matching(self):
"""Test creating SubsequenceMatching."""
# Act
subsequence = SubsequenceMatching()
# Assert
assert isinstance(subsequence, MatchMethodBase)
assert isinstance(subsequence, SubsequenceMatching)

View File

@@ -0,0 +1,496 @@
"""
Test suite for JobRepository with async/support.
This module contains comprehensive tests for all JobRepository methods
using mongomock-motor for in-memory MongoDB testing.
"""
from datetime import datetime
import pytest
from bson import ObjectId
from mongomock.mongo_client import MongoClient
from mongomock_motor import AsyncMongoMockClient
from pymongo.errors import PyMongoError
from app.database.repositories.job_repository import JobRepository
from app.exceptions.job_exceptions import JobRepositoryError
from app.models.job import ProcessingJob, ProcessingStatus
from app.models.types import PyObjectId
@pytest.fixture
def in_memory_repository():
"""Create an in-memory JobRepository for testing."""
client = MongoClient()
db = client.test_database
repo = JobRepository(db)
repo.initialize()
return repo
@pytest.fixture
def sample_document_id():
"""Sample document ObjectId for testing."""
return PyObjectId()
@pytest.fixture
def sample_task_id():
"""Sample Celery task ID for testing."""
return "celery-task-12345-abcde"
@pytest.fixture
def multiple_sample_jobs():
"""Multiple ProcessingJob objects for testing."""
doc_id_1 = ObjectId()
doc_id_2 = ObjectId()
base_time = datetime.utcnow()
return [
ProcessingJob(
document_id=doc_id_1,
status=ProcessingStatus.PENDING,
task_id="task-1",
created_at=base_time,
started_at=None,
completed_at=None,
error_message=None
),
ProcessingJob(
document_id=doc_id_2,
status=ProcessingStatus.PROCESSING,
task_id="task-2",
created_at=base_time,
started_at=base_time,
completed_at=None,
error_message=None
),
ProcessingJob(
document_id=doc_id_1,
status=ProcessingStatus.COMPLETED,
task_id="task-3",
created_at=base_time,
started_at=base_time,
completed_at=base_time,
error_message=None
)
]
class TestJobRepositoryInitialization:
"""Tests for repository initialization."""
def test_i_can_initialize_repository(self):
"""Test repository initialization."""
# Arrange
client = AsyncMongoMockClient()
db = client.test_database
repo = JobRepository(db)
# Act
initialized_repo = repo.initialize()
# Assert
assert initialized_repo is repo
assert repo.db is not None
assert repo.collection is not None
class TestJobRepositoryCreation:
"""Tests for job creation functionality."""
def test_i_can_create_job_with_task_id(self, in_memory_repository, sample_document_id, sample_task_id):
"""Test successful job creation with task ID."""
# Act
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
# Assert
assert created_job is not None
assert created_job.document_id == sample_document_id
assert created_job.task_id == sample_task_id
assert created_job.status == ProcessingStatus.PENDING
assert created_job.created_at is not None
assert created_job.started_at is None
assert created_job.completed_at is None
assert created_job.error_message is None
assert created_job.id is not None
assert isinstance(created_job.id, ObjectId)
def test_i_can_create_job_without_task_id(self, in_memory_repository, sample_document_id):
"""Test successful job creation without task ID."""
# Act
created_job = in_memory_repository.create_job(sample_document_id)
# Assert
assert created_job is not None
assert created_job.document_id == sample_document_id
assert created_job.task_id is None
assert created_job.status == ProcessingStatus.PENDING
assert created_job.created_at is not None
assert created_job.started_at is None
assert created_job.completed_at is None
assert created_job.error_message is None
assert created_job.id is not None
assert isinstance(created_job.id, ObjectId)
def test_i_cannot_create_duplicate_job_for_document(self, in_memory_repository, sample_document_id,
sample_task_id):
"""Test that creating job with duplicate document_id raises DuplicateKeyError."""
# Arrange
in_memory_repository.create_job(sample_document_id, sample_task_id)
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.create_job(sample_document_id, "different-task-id")
assert "create_job" in str(exc_info.value)
def test_i_cannot_create_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
"""Test handling of PyMongo errors during job creation."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'insert_one', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.create_job(sample_document_id)
assert "create_job" in str(exc_info.value)
class TestJobRepositoryFinding:
"""Tests for job finding functionality."""
def test_i_can_find_job_by_valid_id(self, in_memory_repository, sample_document_id, sample_task_id):
"""Test finding job by valid ObjectId."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
# Act
found_job = in_memory_repository.find_job_by_id(created_job.id)
# Assert
assert found_job is not None
assert found_job.id == created_job.id
assert found_job.document_id == created_job.document_id
assert found_job.task_id == created_job.task_id
assert found_job.status == created_job.status
def test_i_cannot_find_job_by_nonexistent_id(self, in_memory_repository):
"""Test that nonexistent ObjectId returns None."""
# Arrange
nonexistent_id = PyObjectId()
# Act
found_job = in_memory_repository.find_job_by_id(nonexistent_id)
# Assert
assert found_job is None
def test_i_cannot_find_job_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during job finding."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find_one', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.find_job_by_id(PyObjectId())
assert "get_job_by_id" in str(exc_info.value)
def test_i_can_find_jobs_by_document_id(self, in_memory_repository, sample_document_id, sample_task_id):
"""Test finding jobs by document ID."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
# Act
found_jobs = in_memory_repository.find_jobs_by_document_id(sample_document_id)
# Assert
assert len(found_jobs) == 1
assert found_jobs[0].id == created_job.id
assert found_jobs[0].document_id == sample_document_id
def test_i_can_find_empty_jobs_list_for_nonexistent_document(self, in_memory_repository):
"""Test that nonexistent document ID returns empty list."""
# Arrange
nonexistent_id = ObjectId()
# Act
found_jobs = in_memory_repository.find_jobs_by_document_id(nonexistent_id)
# Assert
assert found_jobs == []
def test_i_cannot_find_jobs_by_document_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during finding jobs by document ID."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.find_jobs_by_document_id(PyObjectId())
assert "get_jobs_by_file_id" in str(exc_info.value)
@pytest.mark.parametrize("status", [
ProcessingStatus.PENDING,
ProcessingStatus.PROCESSING,
ProcessingStatus.COMPLETED
])
def test_i_can_find_jobs_by_pending_status(self, in_memory_repository, sample_document_id, status):
"""Test finding jobs by PENDING status."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
in_memory_repository.update_job_status(created_job.id, status)
# Act
found_jobs = in_memory_repository.get_jobs_by_status(status)
# Assert
assert len(found_jobs) == 1
assert found_jobs[0].id == created_job.id
assert found_jobs[0].status == status
def test_i_can_find_jobs_by_failed_status(self, in_memory_repository, sample_document_id):
"""Test finding jobs by FAILED status."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED, "Test error")
# Act
found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED)
# Assert
assert len(found_jobs) == 1
assert found_jobs[0].id == created_job.id
assert found_jobs[0].status == ProcessingStatus.FAILED
assert found_jobs[0].error_message == "Test error"
def test_i_can_find_empty_jobs_list_for_unused_status(self, in_memory_repository):
"""Test that unused status returns empty list."""
# Act
found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED)
# Assert
assert found_jobs == []
def test_i_cannot_find_jobs_by_status_with_pymongo_error(self, in_memory_repository, mocker):
"""Test handling of PyMongo errors during finding jobs by status."""
# Arrange
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING)
assert "get_jobs_by_status" in str(exc_info.value)
class TestJobRepositoryStatusUpdate:
"""Tests for job status update functionality."""
def test_i_can_update_job_status_to_processing(self, in_memory_repository, sample_document_id):
"""Test updating job status to PROCESSING with started_at timestamp."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
# Act
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING)
# Assert
assert updated_job is not None
assert updated_job.id == created_job.id
assert updated_job.status == ProcessingStatus.PROCESSING
assert updated_job.started_at is not None
assert updated_job.completed_at is None
assert updated_job.error_message is None
def test_i_can_update_job_status_to_completed(self, in_memory_repository, sample_document_id):
"""Test updating job status to COMPLETED with completed_at timestamp."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING)
# Act
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED)
# Assert
assert updated_job is not None
assert updated_job.id == created_job.id
assert updated_job.status == ProcessingStatus.COMPLETED
assert updated_job.started_at is not None
assert updated_job.completed_at is not None
assert updated_job.error_message is None
def test_i_can_update_job_status_to_failed_with_error(self, in_memory_repository, sample_document_id):
"""Test updating job status to FAILED with error message and completed_at timestamp."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
error_message = "Processing failed due to invalid format"
# Act
updated_job = in_memory_repository.update_job_status(
created_job.id, ProcessingStatus.FAILED, error_message
)
# Assert
assert updated_job is not None
assert updated_job.id == created_job.id
assert updated_job.status == ProcessingStatus.FAILED
assert updated_job.completed_at is not None
assert updated_job.error_message == error_message
def test_i_can_update_job_status_to_failed_without_error(self, in_memory_repository, sample_document_id):
"""Test updating job status to FAILED without error message."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
# Act
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED)
# Assert
assert updated_job is not None
assert updated_job.id == created_job.id
assert updated_job.status == ProcessingStatus.FAILED
assert updated_job.completed_at is not None
assert updated_job.error_message is None
def test_i_cannot_update_nonexistent_job_status(self, in_memory_repository):
"""Test that updating nonexistent job returns None."""
# Arrange
nonexistent_id = ObjectId()
# Act
result = in_memory_repository.update_job_status(nonexistent_id, ProcessingStatus.COMPLETED)
# Assert
assert result is None
def test_i_cannot_update_job_status_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
"""Test handling of PyMongo errors during job status update."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
mocker.patch.object(in_memory_repository.collection, 'find_one_and_update',
side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED)
assert "update_job_status" in str(exc_info.value)
class TestJobRepositoryDeletion:
"""Tests for job deletion functionality."""
def test_i_can_delete_existing_job(self, in_memory_repository, sample_document_id):
"""Test successful job deletion."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
# Act
deletion_result = in_memory_repository.delete_job(created_job.id)
# Assert
assert deletion_result is True
# Verify job is actually deleted
found_job = in_memory_repository.find_job_by_id(created_job.id)
assert found_job is None
def test_i_cannot_delete_nonexistent_job(self, in_memory_repository):
"""Test that deleting nonexistent job returns False."""
# Arrange
nonexistent_id = ObjectId()
# Act
result = in_memory_repository.delete_job(nonexistent_id)
# Assert
assert result is False
def test_i_cannot_delete_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
"""Test handling of PyMongo errors during job deletion."""
# Arrange
created_job = in_memory_repository.create_job(sample_document_id)
mocker.patch.object(in_memory_repository.collection, 'delete_one', side_effect=PyMongoError("Database error"))
# Act & Assert
with pytest.raises(JobRepositoryError) as exc_info:
in_memory_repository.delete_job(created_job.id)
assert "delete_job" in str(exc_info.value)
class TestJobRepositoryComplexScenarios:
"""Tests for complex job repository scenarios."""
def test_i_can_handle_complete_job_lifecycle(self, in_memory_repository, sample_document_id, sample_task_id):
"""Test complete job lifecycle from creation to completion."""
# Create job
job = in_memory_repository.create_job(sample_document_id, sample_task_id)
assert job.status == ProcessingStatus.PENDING
assert job.started_at is None
assert job.completed_at is None
# Start processing
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING)
assert job.status == ProcessingStatus.PROCESSING
assert job.started_at is not None
assert job.completed_at is None
# Complete job
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.COMPLETED)
assert job.status == ProcessingStatus.COMPLETED
assert job.started_at is not None
assert job.completed_at is not None
assert job.error_message is None
def test_i_can_handle_job_failure_scenario(self, in_memory_repository, sample_document_id, sample_task_id):
"""Test job failure scenario with error message."""
# Create and start job
job = in_memory_repository.create_job(sample_document_id, sample_task_id)
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING)
# Fail job with error
error_msg = "File format not supported"
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.FAILED, error_msg)
# Assert failure state
assert job.status == ProcessingStatus.FAILED
assert job.started_at is not None
assert job.completed_at is not None
assert job.error_message == error_msg
def test_i_can_handle_multiple_documents_with_different_statuses(self, in_memory_repository):
"""Test managing multiple jobs for different documents with various statuses."""
# Create jobs for different documents
doc1 = PyObjectId()
doc2 = PyObjectId()
doc3 = PyObjectId()
job1 = in_memory_repository.create_job(doc1, "task-1")
job2 = in_memory_repository.create_job(doc2, "task-2")
job3 = in_memory_repository.create_job(doc3, "task-3")
# Update to different statuses
in_memory_repository.update_job_status(job1.id, ProcessingStatus.PROCESSING)
in_memory_repository.update_job_status(job2.id, ProcessingStatus.COMPLETED)
in_memory_repository.update_job_status(job3.id, ProcessingStatus.FAILED, "Error occurred")
# Verify status queries
pending_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING)
processing_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PROCESSING)
completed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED)
failed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED)
assert len(pending_jobs) == 0
assert len(processing_jobs) == 1
assert len(completed_jobs) == 1
assert len(failed_jobs) == 1
assert processing_jobs[0].id == job1.id
assert completed_jobs[0].id == job2.id
assert failed_jobs[0].id == job3.id

View File

@@ -0,0 +1,279 @@
"""
Test suite for UserRepository with async/support.
This module contains comprehensive tests for all UserRepository methods
using mongomock-motor for in-memory MongoDB testing.
"""
import pytest
from bson import ObjectId
from mongomock.mongo_client import MongoClient
from pymongo.errors import DuplicateKeyError
from app.database.repositories.user_repository import UserRepository
from app.models.user import UserCreate, UserUpdate
@pytest.fixture
def in_memory_repository():
"""Create an in-memory UserRepository for testing."""
client = MongoClient()
db = client.test_database
repo = UserRepository(db)
repo.initialize()
return repo
@pytest.fixture
def sample_user_create():
"""Sample UserCreate data for testing."""
return UserCreate(
username="testuser",
email="test@example.com",
password="#TestPassword123",
role="user"
)
@pytest.fixture
def sample_user_update():
"""Sample UserUpdate data for testing."""
return UserUpdate(
username="updateduser",
email="updated@example.com",
role="admin"
)
class TestUserRepositoryCreation:
"""Tests for user creation functionality."""
def test_i_can_create_user(self, in_memory_repository, sample_user_create):
"""Test successful user creation."""
# Act
created_user = in_memory_repository.create_user(sample_user_create)
# Assert
assert created_user is not None
assert created_user.username == sample_user_create.username
assert created_user.email == sample_user_create.email
assert created_user.role == sample_user_create.role
assert created_user.is_active is True
assert created_user.id is not None
assert created_user.created_at is not None
assert created_user.updated_at is not None
assert created_user.hashed_password != sample_user_create.password # Should be hashed
def test_i_cannot_create_user_with_duplicate_username(self, in_memory_repository, sample_user_create):
"""Test that creating user with duplicate username raises DuplicateKeyError."""
# Arrange
in_memory_repository.create_user(sample_user_create)
# Act & Assert
with pytest.raises(DuplicateKeyError) as exc_info:
in_memory_repository.create_user(sample_user_create)
assert "already exists" in str(exc_info.value)
class TestUserRepositoryFinding:
"""Tests for user finding functionality."""
def test_i_can_find_user_by_id(self, in_memory_repository, sample_user_create):
"""Test finding user by valid ID."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
# Act
found_user = in_memory_repository.find_user_by_id(str(created_user.id))
# Assert
assert found_user is not None
assert found_user.id == created_user.id
assert found_user.username == created_user.username
assert found_user.email == created_user.email
def test_i_cannot_find_user_by_invalid_id(self, in_memory_repository):
"""Test that invalid ObjectId returns None."""
# Act
found_user = in_memory_repository.find_user_by_id("invalid_id")
# Assert
assert found_user is None
def test_i_cannot_find_user_by_nonexistent_id(self, in_memory_repository):
"""Test that nonexistent but valid ObjectId returns None."""
# Arrange
nonexistent_id = str(ObjectId())
# Act
found_user = in_memory_repository.find_user_by_id(nonexistent_id)
# Assert
assert found_user is None
def test_i_can_find_user_by_username(self, in_memory_repository, sample_user_create):
"""Test finding user by username."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
# Act
found_user = in_memory_repository.find_user_by_username(sample_user_create.username)
# Assert
assert found_user is not None
assert found_user.username == created_user.username
assert found_user.id == created_user.id
def test_i_cannot_find_user_by_nonexistent_username(self, in_memory_repository):
"""Test that nonexistent username returns None."""
# Act
found_user = in_memory_repository.find_user_by_username("nonexistent")
# Assert
assert found_user is None
def test_i_can_find_user_by_email(self, in_memory_repository, sample_user_create):
"""Test finding user by email."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
# Act
found_user = in_memory_repository.find_user_by_email(str(sample_user_create.email))
# Assert
assert found_user is not None
assert found_user.email == created_user.email
assert found_user.id == created_user.id
def test_i_cannot_find_user_by_nonexistent_email(self, in_memory_repository):
"""Test that nonexistent email returns None."""
# Act
found_user = in_memory_repository.find_user_by_email("nonexistent@example.com")
# Assert
assert found_user is None
class TestUserRepositoryUpdate:
"""Tests for user update functionality."""
def test_i_can_update_user(self, in_memory_repository, sample_user_create, sample_user_update):
"""Test successful user update."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
original_updated_at = created_user.updated_at
# Act
updated_user = in_memory_repository.update_user(str(created_user.id), sample_user_update)
# Assert
assert updated_user is not None
assert updated_user.username == sample_user_update.username
assert updated_user.email == sample_user_update.email
assert updated_user.role == sample_user_update.role
assert updated_user.id == created_user.id
def test_i_cannot_update_user_with_invalid_id(self, in_memory_repository, sample_user_update):
"""Test that updating with invalid ID returns None."""
# Act
result = in_memory_repository.update_user("invalid_id", sample_user_update)
# Assert
assert result is None
def test_i_can_update_user_with_partial_data(self, in_memory_repository, sample_user_create):
"""Test updating user with partial data."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
partial_update = UserUpdate(username="newusername")
# Act
updated_user = in_memory_repository.update_user(str(created_user.id), partial_update)
# Assert
assert updated_user is not None
assert updated_user.username == "newusername"
assert updated_user.email == created_user.email # Should remain unchanged
assert updated_user.role == created_user.role # Should remain unchanged
def test_i_can_update_user_with_empty_data(self, in_memory_repository, sample_user_create):
"""Test updating user with empty data returns current user."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
empty_update = UserUpdate()
# Act
result = in_memory_repository.update_user(str(created_user.id), empty_update)
# Assert
assert result is not None
assert result.username == created_user.username
assert result.email == created_user.email
class TestUserRepositoryDeletion:
"""Tests for user deletion functionality."""
def test_i_can_delete_user(self, in_memory_repository, sample_user_create):
"""Test successful user deletion."""
# Arrange
created_user = in_memory_repository.create_user(sample_user_create)
# Act
deletion_result = in_memory_repository.delete_user(str(created_user.id))
# Assert
assert deletion_result is True
# Verify user is actually deleted
found_user = in_memory_repository.find_user_by_id(str(created_user.id))
assert found_user is None
def test_i_cannot_delete_user_with_invalid_id(self, in_memory_repository):
"""Test that deleting with invalid ID returns False."""
# Act
result = in_memory_repository.delete_user("invalid_id")
# Assert
assert result is False
def test_i_cannot_delete_nonexistent_user(self, in_memory_repository):
"""Test that deleting nonexistent user returns False."""
# Arrange
nonexistent_id = str(ObjectId())
# Act
result = in_memory_repository.delete_user(nonexistent_id)
# Assert
assert result is False
class TestUserRepositoryUtilities:
"""Tests for utility methods."""
def test_i_can_count_users(self, in_memory_repository, sample_user_create):
"""Test counting users."""
# Arrange
initial_count = in_memory_repository.count_users()
in_memory_repository.create_user(sample_user_create)
# Act
final_count = in_memory_repository.count_users()
# Assert
assert final_count == initial_count + 1
def test_i_can_check_user_exists(self, in_memory_repository, sample_user_create):
"""Test checking if user exists."""
# Arrange
in_memory_repository.create_user(sample_user_create)
# Act
exists = in_memory_repository.user_exists(sample_user_create.username)
not_exists = in_memory_repository.user_exists("nonexistent")
# Assert
assert exists is True
assert not_exists is False