""" Test suite for JobRepository with async/support. This module contains comprehensive tests for all JobRepository methods using mongomock-motor for in-memory MongoDB testing. """ from datetime import datetime import pytest from bson import ObjectId from mongomock.mongo_client import MongoClient from mongomock_motor import AsyncMongoMockClient from pymongo.errors import PyMongoError from app.database.repositories.job_repository import JobRepository from app.exceptions.job_exceptions import JobRepositoryError from app.models.job import ProcessingJob, ProcessingStatus from app.models.types import PyObjectId @pytest.fixture def in_memory_repository(): """Create an in-memory JobRepository for testing.""" client = MongoClient() db = client.test_database repo = JobRepository(db) repo.initialize() return repo @pytest.fixture def sample_document_id(): """Sample document ObjectId for testing.""" return PyObjectId() @pytest.fixture def sample_task_id(): """Sample Celery task ID for testing.""" return "celery-task-12345-abcde" @pytest.fixture def multiple_sample_jobs(): """Multiple ProcessingJob objects for testing.""" doc_id_1 = ObjectId() doc_id_2 = ObjectId() base_time = datetime.utcnow() return [ ProcessingJob( document_id=doc_id_1, status=ProcessingStatus.PENDING, task_id="task-1", created_at=base_time, started_at=None, completed_at=None, error_message=None ), ProcessingJob( document_id=doc_id_2, status=ProcessingStatus.PROCESSING, task_id="task-2", created_at=base_time, started_at=base_time, completed_at=None, error_message=None ), ProcessingJob( document_id=doc_id_1, status=ProcessingStatus.COMPLETED, task_id="task-3", created_at=base_time, started_at=base_time, completed_at=base_time, error_message=None ) ] class TestJobRepositoryInitialization: """Tests for repository initialization.""" def test_i_can_initialize_repository(self): """Test repository initialization.""" # Arrange client = AsyncMongoMockClient() db = client.test_database repo = JobRepository(db) # Act initialized_repo = repo.initialize() # Assert assert initialized_repo is repo assert repo.db is not None assert repo.collection is not None class TestJobRepositoryCreation: """Tests for job creation functionality.""" def test_i_can_create_job_with_task_id(self, in_memory_repository, sample_document_id, sample_task_id): """Test successful job creation with task ID.""" # Act created_job = in_memory_repository.create_job(sample_document_id, sample_task_id) # Assert assert created_job is not None assert created_job.document_id == sample_document_id assert created_job.task_id == sample_task_id assert created_job.status == ProcessingStatus.PENDING assert created_job.created_at is not None assert created_job.started_at is None assert created_job.completed_at is None assert created_job.error_message is None assert created_job.id is not None assert isinstance(created_job.id, ObjectId) def test_i_can_create_job_without_task_id(self, in_memory_repository, sample_document_id): """Test successful job creation without task ID.""" # Act created_job = in_memory_repository.create_job(sample_document_id) # Assert assert created_job is not None assert created_job.document_id == sample_document_id assert created_job.task_id is None assert created_job.status == ProcessingStatus.PENDING assert created_job.created_at is not None assert created_job.started_at is None assert created_job.completed_at is None assert created_job.error_message is None assert created_job.id is not None assert isinstance(created_job.id, ObjectId) def test_i_cannot_create_duplicate_job_for_document(self, in_memory_repository, sample_document_id, sample_task_id): """Test that creating job with duplicate document_id raises DuplicateKeyError.""" # Arrange in_memory_repository.create_job(sample_document_id, sample_task_id) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.create_job(sample_document_id, "different-task-id") assert "create_job" in str(exc_info.value) def test_i_cannot_create_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker): """Test handling of PyMongo errors during job creation.""" # Arrange mocker.patch.object(in_memory_repository.collection, 'insert_one', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.create_job(sample_document_id) assert "create_job" in str(exc_info.value) class TestJobRepositoryFinding: """Tests for job finding functionality.""" def test_i_can_find_job_by_valid_id(self, in_memory_repository, sample_document_id, sample_task_id): """Test finding job by valid ObjectId.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id, sample_task_id) # Act found_job = in_memory_repository.find_job_by_id(created_job.id) # Assert assert found_job is not None assert found_job.id == created_job.id assert found_job.document_id == created_job.document_id assert found_job.task_id == created_job.task_id assert found_job.status == created_job.status def test_i_cannot_find_job_by_nonexistent_id(self, in_memory_repository): """Test that nonexistent ObjectId returns None.""" # Arrange nonexistent_id = PyObjectId() # Act found_job = in_memory_repository.find_job_by_id(nonexistent_id) # Assert assert found_job is None def test_i_cannot_find_job_with_pymongo_error(self, in_memory_repository, mocker): """Test handling of PyMongo errors during job finding.""" # Arrange mocker.patch.object(in_memory_repository.collection, 'find_one', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.find_job_by_id(PyObjectId()) assert "get_job_by_id" in str(exc_info.value) def test_i_can_find_jobs_by_document_id(self, in_memory_repository, sample_document_id, sample_task_id): """Test finding jobs by document ID.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id, sample_task_id) # Act found_jobs = in_memory_repository.find_jobs_by_document_id(sample_document_id) # Assert assert len(found_jobs) == 1 assert found_jobs[0].id == created_job.id assert found_jobs[0].document_id == sample_document_id def test_i_can_find_empty_jobs_list_for_nonexistent_document(self, in_memory_repository): """Test that nonexistent document ID returns empty list.""" # Arrange nonexistent_id = ObjectId() # Act found_jobs = in_memory_repository.find_jobs_by_document_id(nonexistent_id) # Assert assert found_jobs == [] def test_i_cannot_find_jobs_by_document_with_pymongo_error(self, in_memory_repository, mocker): """Test handling of PyMongo errors during finding jobs by document ID.""" # Arrange mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.find_jobs_by_document_id(PyObjectId()) assert "get_jobs_by_file_id" in str(exc_info.value) @pytest.mark.parametrize("status", [ ProcessingStatus.PENDING, ProcessingStatus.PROCESSING, ProcessingStatus.COMPLETED ]) def test_i_can_find_jobs_by_pending_status(self, in_memory_repository, sample_document_id, status): """Test finding jobs by PENDING status.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) in_memory_repository.update_job_status(created_job.id, status) # Act found_jobs = in_memory_repository.get_jobs_by_status(status) # Assert assert len(found_jobs) == 1 assert found_jobs[0].id == created_job.id assert found_jobs[0].status == status def test_i_can_find_jobs_by_failed_status(self, in_memory_repository, sample_document_id): """Test finding jobs by FAILED status.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED, "Test error") # Act found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED) # Assert assert len(found_jobs) == 1 assert found_jobs[0].id == created_job.id assert found_jobs[0].status == ProcessingStatus.FAILED assert found_jobs[0].error_message == "Test error" def test_i_can_find_empty_jobs_list_for_unused_status(self, in_memory_repository): """Test that unused status returns empty list.""" # Act found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED) # Assert assert found_jobs == [] def test_i_cannot_find_jobs_by_status_with_pymongo_error(self, in_memory_repository, mocker): """Test handling of PyMongo errors during finding jobs by status.""" # Arrange mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING) assert "get_jobs_by_status" in str(exc_info.value) class TestJobRepositoryStatusUpdate: """Tests for job status update functionality.""" def test_i_can_update_job_status_to_processing(self, in_memory_repository, sample_document_id): """Test updating job status to PROCESSING with started_at timestamp.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) # Act updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING) # Assert assert updated_job is not None assert updated_job.id == created_job.id assert updated_job.status == ProcessingStatus.PROCESSING assert updated_job.started_at is not None assert updated_job.completed_at is None assert updated_job.error_message is None def test_i_can_update_job_status_to_completed(self, in_memory_repository, sample_document_id): """Test updating job status to COMPLETED with completed_at timestamp.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING) # Act updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED) # Assert assert updated_job is not None assert updated_job.id == created_job.id assert updated_job.status == ProcessingStatus.COMPLETED assert updated_job.started_at is not None assert updated_job.completed_at is not None assert updated_job.error_message is None def test_i_can_update_job_status_to_failed_with_error(self, in_memory_repository, sample_document_id): """Test updating job status to FAILED with error message and completed_at timestamp.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) error_message = "Processing failed due to invalid format" # Act updated_job = in_memory_repository.update_job_status( created_job.id, ProcessingStatus.FAILED, error_message ) # Assert assert updated_job is not None assert updated_job.id == created_job.id assert updated_job.status == ProcessingStatus.FAILED assert updated_job.completed_at is not None assert updated_job.error_message == error_message def test_i_can_update_job_status_to_failed_without_error(self, in_memory_repository, sample_document_id): """Test updating job status to FAILED without error message.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) # Act updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED) # Assert assert updated_job is not None assert updated_job.id == created_job.id assert updated_job.status == ProcessingStatus.FAILED assert updated_job.completed_at is not None assert updated_job.error_message is None def test_i_cannot_update_nonexistent_job_status(self, in_memory_repository): """Test that updating nonexistent job returns None.""" # Arrange nonexistent_id = ObjectId() # Act result = in_memory_repository.update_job_status(nonexistent_id, ProcessingStatus.COMPLETED) # Assert assert result is None def test_i_cannot_update_job_status_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker): """Test handling of PyMongo errors during job status update.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) mocker.patch.object(in_memory_repository.collection, 'find_one_and_update', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED) assert "update_job_status" in str(exc_info.value) class TestJobRepositoryDeletion: """Tests for job deletion functionality.""" def test_i_can_delete_existing_job(self, in_memory_repository, sample_document_id): """Test successful job deletion.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) # Act deletion_result = in_memory_repository.delete_job(created_job.id) # Assert assert deletion_result is True # Verify job is actually deleted found_job = in_memory_repository.find_job_by_id(created_job.id) assert found_job is None def test_i_cannot_delete_nonexistent_job(self, in_memory_repository): """Test that deleting nonexistent job returns False.""" # Arrange nonexistent_id = ObjectId() # Act result = in_memory_repository.delete_job(nonexistent_id) # Assert assert result is False def test_i_cannot_delete_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker): """Test handling of PyMongo errors during job deletion.""" # Arrange created_job = in_memory_repository.create_job(sample_document_id) mocker.patch.object(in_memory_repository.collection, 'delete_one', side_effect=PyMongoError("Database error")) # Act & Assert with pytest.raises(JobRepositoryError) as exc_info: in_memory_repository.delete_job(created_job.id) assert "delete_job" in str(exc_info.value) class TestJobRepositoryComplexScenarios: """Tests for complex job repository scenarios.""" def test_i_can_handle_complete_job_lifecycle(self, in_memory_repository, sample_document_id, sample_task_id): """Test complete job lifecycle from creation to completion.""" # Create job job = in_memory_repository.create_job(sample_document_id, sample_task_id) assert job.status == ProcessingStatus.PENDING assert job.started_at is None assert job.completed_at is None # Start processing job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING) assert job.status == ProcessingStatus.PROCESSING assert job.started_at is not None assert job.completed_at is None # Complete job job = in_memory_repository.update_job_status(job.id, ProcessingStatus.COMPLETED) assert job.status == ProcessingStatus.COMPLETED assert job.started_at is not None assert job.completed_at is not None assert job.error_message is None def test_i_can_handle_job_failure_scenario(self, in_memory_repository, sample_document_id, sample_task_id): """Test job failure scenario with error message.""" # Create and start job job = in_memory_repository.create_job(sample_document_id, sample_task_id) job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING) # Fail job with error error_msg = "File format not supported" job = in_memory_repository.update_job_status(job.id, ProcessingStatus.FAILED, error_msg) # Assert failure state assert job.status == ProcessingStatus.FAILED assert job.started_at is not None assert job.completed_at is not None assert job.error_message == error_msg def test_i_can_handle_multiple_documents_with_different_statuses(self, in_memory_repository): """Test managing multiple jobs for different documents with various statuses.""" # Create jobs for different documents doc1 = PyObjectId() doc2 = PyObjectId() doc3 = PyObjectId() job1 = in_memory_repository.create_job(doc1, "task-1") job2 = in_memory_repository.create_job(doc2, "task-2") job3 = in_memory_repository.create_job(doc3, "task-3") # Update to different statuses in_memory_repository.update_job_status(job1.id, ProcessingStatus.PROCESSING) in_memory_repository.update_job_status(job2.id, ProcessingStatus.COMPLETED) in_memory_repository.update_job_status(job3.id, ProcessingStatus.FAILED, "Error occurred") # Verify status queries pending_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING) processing_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PROCESSING) completed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED) failed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED) assert len(pending_jobs) == 0 assert len(processing_jobs) == 1 assert len(completed_jobs) == 1 assert len(failed_jobs) == 1 assert processing_jobs[0].id == job1.id assert completed_jobs[0].id == job2.id assert failed_jobs[0].id == job3.id