Implemented default pipeline

2025-09-26 22:08:39 +02:00
parent f1b551d243
commit 4de732b0ae
56 changed files with 4534 additions and 2837 deletions
--- a/tests/services/init.py
+++ b/tests/services/init.py
--- a/tests/services/test_document_service.py
+++ b/tests/services/test_document_service.py
@@ -0,0 +1,570 @@
+"""
+Unit tests for DocumentService using in-memory MongoDB.
+
+Tests the orchestration logic with real MongoDB operations
+using mongomock for better integration testing.
+"""
+import os
+from datetime import datetime
+from unittest.mock import patch
+
+import pytest
+import pytest_asyncio
+from bson import ObjectId
+from mongomock.mongo_client import MongoClient
+
+from app.models.document import FileType
+from app.services.document_service import DocumentService
+
+
+@pytest.fixture(autouse=True)
+def cleanup_test_folder():
+  """Clean up test folder."""
+  import shutil
+  shutil.rmtree("test_folder", ignore_errors=True)
+
+
+@pytest.fixture
+def in_memory_database():
+  """Create an in-memory database for testing."""
+  client = MongoClient()
+  return client.test_database
+
+
+@pytest_asyncio.fixture
+def document_service(in_memory_database):
+  """Create DocumentService with in-memory repositories."""
+  service = DocumentService(in_memory_database, objects_folder="test_folder")
+  return service
+
+
+@pytest.fixture
+def sample_file_bytes():
+  """Sample file content as bytes."""
+  return b"This is a test PDF content"
+
+
+@pytest.fixture
+def sample_text_bytes():
+  """Sample text file content as bytes."""
+  return b"This is a test text file content"
+
+
+@pytest.fixture
+def sample_file_hash():
+  """Expected SHA256 hash for sample file bytes."""
+  import hashlib
+  return hashlib.sha256(b"This is a test PDF content").hexdigest()
+
+
+def validate_file_saved(document_service, file_hash, file_bytes):
+  # Verify file is saved to disk
+  target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
+  assert os.path.exists(target_file_path)
+  
+  with open(target_file_path, "rb") as f:
+    content = f.read()
+  assert content == file_bytes
+
+
+class TestCreateDocument:
+  """Tests for create_document method."""
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  @patch('app.services.document_service.datetime')
+  def test_i_can_create_document_with_new_content(
+      self,
+      mock_datetime,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test creating document when content doesn't exist yet."""
+    # Setup mocks
+    fixed_time = datetime(2025, 1, 1, 10, 30, 0)
+    mock_datetime.now.return_value = fixed_time
+    mock_magic.return_value = "application/pdf"
+    
+    # Execute
+    result = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Verify document creation
+    assert result is not None
+    assert result.filename == "test.pdf"
+    assert result.filepath == "/test/test.pdf"
+    assert result.file_type == FileType.PDF
+    assert result.detected_at == fixed_time
+    assert result.file_hash == document_service._calculate_file_hash(sample_file_bytes)
+    
+    # Verify document created in database
+    doc_in_db = document_service.document_repository.find_document_by_id(result.id)
+    assert doc_in_db is not None
+    assert doc_in_db.id == result.id
+    assert doc_in_db.filename == result.filename
+    assert doc_in_db.filepath == result.filepath
+    assert doc_in_db.file_type == result.file_type
+    assert doc_in_db.detected_at == fixed_time
+    assert doc_in_db.file_hash == result.file_hash
+    
+    # Verify file is saved to disk
+    validate_file_saved(document_service, result.file_hash, sample_file_bytes)
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  @patch('app.services.document_service.datetime')
+  def test_i_can_create_document_with_existing_content(
+      self,
+      mock_datetime,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test creating document when content already exists (deduplication)."""
+    # Setup mocks
+    fixed_time = datetime(2025, 1, 1, 10, 30, 0)
+    mock_datetime.now.return_value = fixed_time
+    mock_magic.return_value = "application/pdf"
+    
+    # Create first document
+    first_doc = document_service.create_document(
+      "/test/first.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Create second document with same content
+    second_doc = document_service.create_document(
+      "/test/second.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Verify both documents exist but share same hash
+    assert first_doc.file_hash == second_doc.file_hash
+    assert first_doc.filename != second_doc.filename
+    assert first_doc.filepath != second_doc.filepath
+  
+  def test_i_cannot_create_document_with_unsupported_file_type(
+      self,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test that unsupported file types raise ValueError."""
+    with pytest.raises(ValueError, match="Unsupported file type"):
+      document_service.create_document(
+        "/test/test.xyz",  # Unsupported extension
+        sample_file_bytes,
+        "utf-8"
+      )
+  
+  def test_i_cannot_create_document_with_empty_file_path(
+      self,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test that empty file path raises ValueError."""
+    with pytest.raises(ValueError):
+      document_service.create_document(
+        "",  # Empty path
+        sample_file_bytes,
+        "utf-8"
+      )
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_create_document_with_empty_bytes(
+      self,
+      mock_magic,
+      document_service
+  ):
+    """Test behavior with empty file bytes."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Execute with empty bytes
+    result = document_service.create_document(
+      "/test/empty.txt",
+      b"",  # Empty bytes
+      "utf-8"
+    )
+    
+    # Verify file is saved to disk
+    validate_file_saved(document_service, result.file_hash, b"")
+
+
+class TestGetMethods:
+  """Tests for document retrieval methods."""
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_get_document_by_id(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test retrieving document by ID."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute
+    result = document_service.get_document_by_id(created_doc.id)
+    
+    # Verify
+    assert result is not None
+    assert result.id == created_doc.id
+    assert result.filename == created_doc.filename
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_get_document_by_hash(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test retrieving document by file hash."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute
+    result = document_service.get_document_by_hash(created_doc.file_hash)
+    
+    # Verify
+    assert result is not None
+    assert result.file_hash == created_doc.file_hash
+    assert result.filename == created_doc.filename
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_get_document_by_filepath(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test retrieving document by file path."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    test_path = "/test/unique_test.pdf"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      test_path,
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute
+    result = document_service.get_document_by_filepath(test_path)
+    
+    # Verify
+    assert result is not None
+    assert result.filepath == test_path
+    assert result.id == created_doc.id
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_get_document_content(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test retrieving document with associated content."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute
+    result = document_service.get_document_content_by_hash(created_doc.file_hash)
+    
+    # Verify
+    assert result == sample_file_bytes
+  
+  def test_i_cannot_get_nonexistent_document_by_id(
+      self,
+      document_service
+  ):
+    """Test that nonexistent document returns None."""
+    # Execute with random ObjectId
+    result = document_service.get_document_by_id(ObjectId())
+    
+    # Verify
+    assert result is None
+  
+  def test_i_cannot_get_nonexistent_document_by_hash(
+      self,
+      document_service
+  ):
+    """Test that nonexistent document hash returns None."""
+    # Execute
+    result = document_service.get_document_by_hash("nonexistent_hash")
+    
+    # Verify
+    assert result is None
+
+
+class TestPaginationAndCounting:
+  """Tests for document listing and counting."""
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_list_documents_with_pagination(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test document listing with pagination parameters."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create multiple documents
+    for i in range(5):
+      document_service.create_document(
+        f"/test/test{i}.pdf",
+        sample_file_bytes + bytes(str(i), 'utf-8'),  # Make each file unique
+        "utf-8"
+      )
+    
+    # Execute with pagination
+    result = document_service.list_documents(skip=1, limit=2)
+    
+    # Verify
+    assert len(result) == 2
+    
+    # Test counting
+    total_count = document_service.count_documents()
+    assert total_count == 5
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_count_documents(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test document counting."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Initially should be 0
+    initial_count = document_service.count_documents()
+    assert initial_count == 0
+    
+    # Create some documents
+    for i in range(3):
+      document_service.create_document(
+        f"/test/test{i}.txt",
+        sample_file_bytes + bytes(str(i), 'utf-8'),
+        "utf-8"
+      )
+    
+    # Execute
+    final_count = document_service.count_documents()
+    
+    # Verify
+    assert final_count == 3
+
+
+class TestUpdateAndDelete:
+  """Tests for document update and deletion operations."""
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_update_document_metadata(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test updating document metadata."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute update
+    update_data = {"metadata": {"page_count": 5}}
+    result = document_service.update_document(created_doc.id, update_data)
+    
+    # Verify
+    assert result is not None
+    assert result.metadata.get("page_count") == 5
+    assert result.filename == created_doc.filename
+    assert result.filepath == created_doc.filepath
+    assert result.file_hash == created_doc.file_hash
+    assert result.file_type == created_doc.file_type
+    assert result.metadata == update_data['metadata']
+  
+  def test_i_can_update_document_content(
+      self,
+      document_service,
+      sample_file_bytes
+  ):
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Execute update
+    update_data = {"file_bytes": b"this is an updated file content"}
+    result = document_service.update_document(created_doc.id, update_data)
+    
+    assert result.filename == created_doc.filename
+    assert result.filepath == created_doc.filepath
+    assert result.file_hash != created_doc.file_hash
+    assert result.file_type == created_doc.file_type
+    assert result.metadata == created_doc.metadata
+    
+    # Verify file is saved to disk
+    validate_file_saved(document_service, result.file_hash, b"this is an updated file content")
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_delete_document_and_orphaned_content(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test deleting document with orphaned content cleanup."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create a document
+    created_doc = document_service.create_document(
+      "/test/test.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Verify content exists
+    validate_file_saved(document_service, created_doc.file_hash, sample_file_bytes)
+    
+    # Execute deletion
+    result = document_service.delete_document(created_doc.id)
+    
+    # Verify document and content are deleted
+    assert result is True
+    
+    deleted_doc = document_service.get_document_by_id(created_doc.id)
+    assert deleted_doc is None
+    
+    # validate content is deleted
+    file_hash = created_doc.file_hash[:24]
+    target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
+    assert not os.path.exists(target_file_path)
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_delete_document_without_affecting_shared_content(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test deleting document without removing shared content."""
+    # Setup
+    mock_magic.return_value = "application/pdf"
+    
+    # Create two documents with same content
+    doc1 = document_service.create_document(
+      "/test/test1.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    doc2 = document_service.create_document(
+      "/test/test2.pdf",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # They should share the same hash
+    assert doc1.file_hash == doc2.file_hash
+    
+    # Delete first document
+    result = document_service.delete_document(doc1.id)
+    assert result is True
+    
+    # Verify first document is deleted but content still exists
+    deleted_doc = document_service.get_document_by_id(doc1.id)
+    assert deleted_doc is None
+    
+    remaining_doc = document_service.get_document_by_id(doc2.id)
+    assert remaining_doc is not None
+    
+    validate_file_saved(document_service, doc2.file_hash, sample_file_bytes)
+
+
+class TestHashCalculation:
+  """Tests for file hash calculation utility."""
+  
+  def test_i_can_calculate_consistent_file_hash(self, document_service):
+    """Test that file hash calculation is consistent."""
+    test_bytes = b"Test content for hashing"
+    
+    # Calculate hash multiple times
+    hash1 = document_service._calculate_file_hash(test_bytes)
+    hash2 = document_service._calculate_file_hash(test_bytes)
+    
+    # Should be identical
+    assert hash1 == hash2
+    assert len(hash1) == 64  # SHA256 produces 64-character hex string
+  
+  def test_i_get_different_hashes_for_different_content(self, document_service):
+    """Test that different content produces different hashes."""
+    content1 = b"First content"
+    content2 = b"Second content"
+    
+    hash1 = document_service._calculate_file_hash(content1)
+    hash2 = document_service._calculate_file_hash(content2)
+    
+    assert hash1 != hash2
+
+
+class TestFileTypeDetection:
+  """Tests for file type detection."""
+  
+  def test_i_can_detect_pdf_file_type(self, document_service):
+    """Test PDF file type detection."""
+    file_type = document_service._detect_file_type("/path/to/document.pdf")
+    assert file_type == FileType.PDF
+  
+  def test_i_can_detect_txt_file_type(self, document_service):
+    """Test text file type detection."""
+    file_type = document_service._detect_file_type("/path/to/document.txt")
+    assert file_type == FileType.TXT
+  
+  def test_i_can_detect_docx_file_type(self, document_service):
+    """Test DOCX file type detection."""
+    file_type = document_service._detect_file_type("/path/to/document.docx")
+    assert file_type == FileType.DOCX
+  
+  def test_i_cannot_detect_unsupported_file_type(self, document_service):
+    """Test unsupported file type raises ValueError."""
+    with pytest.raises(ValueError, match="Unsupported file type"):
+      document_service._detect_file_type("/path/to/document.xyz")
--- a/tests/services/test_job_service.py
+++ b/tests/services/test_job_service.py
@@ -0,0 +1,518 @@
+"""
+Unit tests for JobService using in-memory MongoDB.
+
+Tests the business logic operations with real MongoDB operations
+using mongomock for better integration testing.
+"""
+
+import pytest
+from bson import ObjectId
+from mongomock.mongo_client import MongoClient
+
+from app.exceptions.job_exceptions import InvalidStatusTransitionError
+from app.models.job import ProcessingStatus
+from app.models.types import PyObjectId
+from app.services.job_service import JobService
+
+
+@pytest.fixture
+def in_memory_database():
+  """Create an in-memory database for testing."""
+  client = MongoClient()
+  return client.test_database
+
+
+@pytest.fixture
+def job_service(in_memory_database):
+  """Create JobService with in-memory repositories."""
+  service = JobService(in_memory_database).initialize()
+  return service
+
+
+@pytest.fixture
+def sample_document_id():
+  """Sample file ObjectId."""
+  return PyObjectId()
+
+
+@pytest.fixture
+def sample_task_id():
+  """Sample Celery task UUID."""
+  return "550e8400-e29b-41d4-a716-446655440000"
+
+
+class TestCreateJob:
+  """Tests for create_job method."""
+  
+  def test_i_can_create_job_with_task_id(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test creating job with task ID."""
+    # Execute
+    result = job_service.create_job(sample_document_id, sample_task_id)
+    
+    # Verify job creation
+    assert result is not None
+    assert result.document_id == sample_document_id
+    assert result.task_id == sample_task_id
+    assert result.status == ProcessingStatus.PENDING
+    assert result.created_at is not None
+    assert result.started_at is None
+    assert result.error_message is None
+    
+    # Verify job exists in database
+    job_in_db = job_service.get_job_by_id(result.id)
+    assert job_in_db is not None
+    assert job_in_db.id == result.id
+    assert job_in_db.document_id == sample_document_id
+    assert job_in_db.task_id == sample_task_id
+    assert job_in_db.status == ProcessingStatus.PENDING
+  
+  def test_i_can_create_job_without_task_id(
+      self,
+      job_service,
+      sample_document_id
+  ):
+    """Test creating job without task ID."""
+    # Execute
+    result = job_service.create_job(sample_document_id)
+    
+    # Verify job creation
+    assert result is not None
+    assert result.document_id == sample_document_id
+    assert result.task_id is None
+    assert result.status == ProcessingStatus.PENDING
+    assert result.created_at is not None
+    assert result.started_at is None
+    assert result.error_message is None
+
+
+class TestGetJobMethods:
+  """Tests for job retrieval methods."""
+  
+  def test_i_can_get_job_by_id(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test retrieving job by ID."""
+    # Create a job first
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    
+    # Execute
+    result = job_service.get_job_by_id(created_job.id)
+    
+    # Verify
+    assert result is not None
+    assert result.id == created_job.id
+    assert result.document_id == created_job.document_id
+    assert result.task_id == created_job.task_id
+    assert result.status == created_job.status
+  
+  def test_i_can_get_jobs_by_status(
+      self,
+      job_service,
+      sample_document_id
+  ):
+    """Test retrieving jobs by status."""
+    # Create jobs with different statuses
+    pending_job = job_service.create_job(sample_document_id, "pending-task")
+    
+    processing_job = job_service.create_job(ObjectId(), "processing-task")
+    job_service.mark_job_as_started(processing_job.id)
+    
+    completed_job = job_service.create_job(ObjectId(), "completed-task")
+    job_service.mark_job_as_started(completed_job.id)
+    job_service.mark_job_as_completed(completed_job.id)
+    
+    # Execute - get pending jobs
+    pending_results = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
+    
+    # Verify
+    assert len(pending_results) == 1
+    assert pending_results[0].id == pending_job.id
+    assert pending_results[0].status == ProcessingStatus.PENDING
+    
+    # Execute - get processing jobs
+    processing_results = job_service.get_jobs_by_status(ProcessingStatus.PROCESSING)
+    assert len(processing_results) == 1
+    assert processing_results[0].status == ProcessingStatus.PROCESSING
+    
+    # Execute - get completed jobs
+    completed_results = job_service.get_jobs_by_status(ProcessingStatus.COMPLETED)
+    assert len(completed_results) == 1
+    assert completed_results[0].status == ProcessingStatus.COMPLETED
+
+
+class TestUpdateStatus:
+  """Tests for mark_job_as_started method."""
+  
+  def test_i_can_mark_pending_job_as_started(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test marking pending job as started (PENDING → PROCESSING)."""
+    # Create a pending job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    assert created_job.status == ProcessingStatus.PENDING
+    
+    # Execute
+    result = job_service.mark_job_as_started(created_job.id)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.id == created_job.id
+    assert result.status == ProcessingStatus.PROCESSING
+    
+    # Verify in database
+    updated_job = job_service.get_job_by_id(created_job.id)
+    assert updated_job.status == ProcessingStatus.PROCESSING
+  
+  def test_i_cannot_mark_processing_job_as_started(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that processing job cannot be marked as started."""
+    # Create and start a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    
+    # Try to start it again
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_started(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.PROCESSING
+    assert exc_info.value.target_status == ProcessingStatus.PROCESSING
+  
+  def test_i_cannot_mark_completed_job_as_started(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that completed job cannot be marked as started."""
+    # Create, start, and complete a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_completed(created_job.id)
+    
+    # Try to start it again
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_started(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.COMPLETED
+    assert exc_info.value.target_status == ProcessingStatus.PROCESSING
+  
+  def test_i_cannot_mark_failed_job_as_started(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that failed job cannot be marked as started."""
+    # Create, start, and fail a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_failed(created_job.id, "Test error")
+    
+    # Try to start it again
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_started(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.FAILED
+    assert exc_info.value.target_status == ProcessingStatus.PROCESSING
+  
+  def test_i_can_mark_processing_job_as_completed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test marking processing job as completed (PROCESSING → COMPLETED)."""
+    # Create and start a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    started_job = job_service.mark_job_as_started(created_job.id)
+    
+    # Execute
+    result = job_service.mark_job_as_completed(created_job.id)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.id == created_job.id
+    assert result.status == ProcessingStatus.COMPLETED
+    
+    # Verify in database
+    updated_job = job_service.get_job_by_id(created_job.id)
+    assert updated_job.status == ProcessingStatus.COMPLETED
+  
+  def test_i_cannot_mark_pending_job_as_completed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that pending job cannot be marked as completed."""
+    # Create a pending job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    
+    # Try to complete it directly
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_completed(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.PENDING
+    assert exc_info.value.target_status == ProcessingStatus.COMPLETED
+  
+  def test_i_cannot_mark_completed_job_as_completed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that completed job cannot be marked as completed again."""
+    # Create, start, and complete a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_completed(created_job.id)
+    
+    # Try to complete it again
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_completed(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.COMPLETED
+    assert exc_info.value.target_status == ProcessingStatus.COMPLETED
+  
+  def test_i_cannot_mark_failed_job_as_completed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that failed job cannot be marked as completed."""
+    # Create, start, and fail a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_failed(created_job.id, "Test error")
+    
+    # Try to complete it
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_completed(created_job.id)
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.FAILED
+    assert exc_info.value.target_status == ProcessingStatus.COMPLETED
+  
+  def test_i_can_mark_processing_job_as_failed_with_error_message(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test marking processing job as failed with error message."""
+    # Create and start a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    started_job = job_service.mark_job_as_started(created_job.id)
+    
+    error_message = "Processing failed due to invalid file format"
+    
+    # Execute
+    result = job_service.mark_job_as_failed(created_job.id, error_message)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.id == created_job.id
+    assert result.status == ProcessingStatus.FAILED
+    assert result.error_message == error_message
+    
+    # Verify in database
+    updated_job = job_service.get_job_by_id(created_job.id)
+    assert updated_job.status == ProcessingStatus.FAILED
+    assert updated_job.error_message == error_message
+  
+  def test_i_can_mark_processing_job_as_failed_without_error_message(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test marking processing job as failed without error message."""
+    # Create and start a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    
+    # Execute without error message
+    result = job_service.mark_job_as_failed(created_job.id)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.status == ProcessingStatus.FAILED
+    assert result.error_message is None
+  
+  def test_i_cannot_mark_pending_job_as_failed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that pending job cannot be marked as failed."""
+    # Create a pending job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    
+    # Try to fail it directly
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_failed(created_job.id, "Test error")
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.PENDING
+    assert exc_info.value.target_status == ProcessingStatus.FAILED
+  
+  def test_i_cannot_mark_completed_job_as_failed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that completed job cannot be marked as failed."""
+    # Create, start, and complete a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_completed(created_job.id)
+    
+    # Try to fail it
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_failed(created_job.id, "Test error")
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.COMPLETED
+    assert exc_info.value.target_status == ProcessingStatus.FAILED
+  
+  def test_i_cannot_mark_failed_job_as_failed(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that failed job cannot be marked as failed again."""
+    # Create, start, and fail a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    job_service.mark_job_as_failed(created_job.id, "First error")
+    
+    # Try to fail it again
+    with pytest.raises(InvalidStatusTransitionError) as exc_info:
+      job_service.mark_job_as_failed(created_job.id, "Second error")
+    
+    # Verify exception details
+    assert exc_info.value.current_status == ProcessingStatus.FAILED
+    assert exc_info.value.target_status == ProcessingStatus.FAILED
+
+
+class TestDeleteJob:
+  """Tests for delete_job method."""
+  
+  def test_i_can_delete_existing_job(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test deleting an existing job."""
+    # Create a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    
+    # Verify job exists
+    job_before_delete = job_service.get_job_by_id(created_job.id)
+    assert job_before_delete is not None
+    
+    # Execute deletion
+    result = job_service.delete_job(created_job.id)
+    
+    # Verify deletion
+    assert result is True
+    
+    # Verify job no longer exists
+    deleted_job = job_service.get_job_by_id(created_job.id)
+    assert deleted_job is None
+  
+  def test_i_cannot_delete_nonexistent_job(
+      self,
+      job_service
+  ):
+    """Test deleting a nonexistent job returns False."""
+    # Execute deletion with random ObjectId
+    result = job_service.delete_job(ObjectId())
+    
+    # Verify
+    assert result is False
+
+
+class TestStatusTransitionValidation:
+  """Tests for status transition validation across different scenarios."""
+  
+  def test_valid_job_lifecycle_flow(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test complete valid job lifecycle: PENDING → PROCESSING → COMPLETED."""
+    # Create job (PENDING)
+    job = job_service.create_job(sample_document_id, sample_task_id)
+    assert job.status == ProcessingStatus.PENDING
+    
+    # Start job (PENDING → PROCESSING)
+    started_job = job_service.mark_job_as_started(job.id)
+    assert started_job.status == ProcessingStatus.PROCESSING
+    
+    # Complete job (PROCESSING → COMPLETED)
+    completed_job = job_service.mark_job_as_completed(job.id)
+    assert completed_job.status == ProcessingStatus.COMPLETED
+  
+  def test_valid_job_failure_flow(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test valid job failure: PENDING → PROCESSING → FAILED."""
+    # Create job (PENDING)
+    job = job_service.create_job(sample_document_id, sample_task_id)
+    assert job.status == ProcessingStatus.PENDING
+    
+    # Start job (PENDING → PROCESSING)
+    started_job = job_service.mark_job_as_started(job.id)
+    assert started_job.status == ProcessingStatus.PROCESSING
+    
+    # Fail job (PROCESSING → FAILED)
+    failed_job = job_service.mark_job_as_failed(job.id, "Test failure")
+    assert failed_job.status == ProcessingStatus.FAILED
+    assert failed_job.error_message == "Test failure"
+  
+  def test_job_operations_with_empty_database(
+      self,
+      job_service
+  ):
+    """Test job operations when database is empty."""
+    # Try to get nonexistent job
+    result = job_service.get_job_by_id(ObjectId())
+    assert result is None
+    
+    # Try to get jobs by status when none exist
+    pending_jobs = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
+    assert pending_jobs == []
+    
+    # Try to delete nonexistent job
+    delete_result = job_service.delete_job(ObjectId())
+    assert delete_result is False