Implemented default pipeline

This commit is contained in:
2025-09-26 22:08:39 +02:00
parent f1b551d243
commit 4de732b0ae
56 changed files with 4534 additions and 2837 deletions

View File

View File

@@ -0,0 +1,570 @@
"""
Unit tests for DocumentService using in-memory MongoDB.
Tests the orchestration logic with real MongoDB operations
using mongomock for better integration testing.
"""
import os
from datetime import datetime
from unittest.mock import patch
import pytest
import pytest_asyncio
from bson import ObjectId
from mongomock.mongo_client import MongoClient
from app.models.document import FileType
from app.services.document_service import DocumentService
@pytest.fixture(autouse=True)
def cleanup_test_folder():
"""Clean up test folder."""
import shutil
shutil.rmtree("test_folder", ignore_errors=True)
@pytest.fixture
def in_memory_database():
"""Create an in-memory database for testing."""
client = MongoClient()
return client.test_database
@pytest_asyncio.fixture
def document_service(in_memory_database):
"""Create DocumentService with in-memory repositories."""
service = DocumentService(in_memory_database, objects_folder="test_folder")
return service
@pytest.fixture
def sample_file_bytes():
"""Sample file content as bytes."""
return b"This is a test PDF content"
@pytest.fixture
def sample_text_bytes():
"""Sample text file content as bytes."""
return b"This is a test text file content"
@pytest.fixture
def sample_file_hash():
"""Expected SHA256 hash for sample file bytes."""
import hashlib
return hashlib.sha256(b"This is a test PDF content").hexdigest()
def validate_file_saved(document_service, file_hash, file_bytes):
# Verify file is saved to disk
target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
assert os.path.exists(target_file_path)
with open(target_file_path, "rb") as f:
content = f.read()
assert content == file_bytes
class TestCreateDocument:
"""Tests for create_document method."""
@patch('app.services.document_service.magic.from_buffer')
@patch('app.services.document_service.datetime')
def test_i_can_create_document_with_new_content(
self,
mock_datetime,
mock_magic,
document_service,
sample_file_bytes
):
"""Test creating document when content doesn't exist yet."""
# Setup mocks
fixed_time = datetime(2025, 1, 1, 10, 30, 0)
mock_datetime.now.return_value = fixed_time
mock_magic.return_value = "application/pdf"
# Execute
result = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Verify document creation
assert result is not None
assert result.filename == "test.pdf"
assert result.filepath == "/test/test.pdf"
assert result.file_type == FileType.PDF
assert result.detected_at == fixed_time
assert result.file_hash == document_service._calculate_file_hash(sample_file_bytes)
# Verify document created in database
doc_in_db = document_service.document_repository.find_document_by_id(result.id)
assert doc_in_db is not None
assert doc_in_db.id == result.id
assert doc_in_db.filename == result.filename
assert doc_in_db.filepath == result.filepath
assert doc_in_db.file_type == result.file_type
assert doc_in_db.detected_at == fixed_time
assert doc_in_db.file_hash == result.file_hash
# Verify file is saved to disk
validate_file_saved(document_service, result.file_hash, sample_file_bytes)
@patch('app.services.document_service.magic.from_buffer')
@patch('app.services.document_service.datetime')
def test_i_can_create_document_with_existing_content(
self,
mock_datetime,
mock_magic,
document_service,
sample_file_bytes
):
"""Test creating document when content already exists (deduplication)."""
# Setup mocks
fixed_time = datetime(2025, 1, 1, 10, 30, 0)
mock_datetime.now.return_value = fixed_time
mock_magic.return_value = "application/pdf"
# Create first document
first_doc = document_service.create_document(
"/test/first.pdf",
sample_file_bytes,
"utf-8"
)
# Create second document with same content
second_doc = document_service.create_document(
"/test/second.pdf",
sample_file_bytes,
"utf-8"
)
# Verify both documents exist but share same hash
assert first_doc.file_hash == second_doc.file_hash
assert first_doc.filename != second_doc.filename
assert first_doc.filepath != second_doc.filepath
def test_i_cannot_create_document_with_unsupported_file_type(
self,
document_service,
sample_file_bytes
):
"""Test that unsupported file types raise ValueError."""
with pytest.raises(ValueError, match="Unsupported file type"):
document_service.create_document(
"/test/test.xyz", # Unsupported extension
sample_file_bytes,
"utf-8"
)
def test_i_cannot_create_document_with_empty_file_path(
self,
document_service,
sample_file_bytes
):
"""Test that empty file path raises ValueError."""
with pytest.raises(ValueError):
document_service.create_document(
"", # Empty path
sample_file_bytes,
"utf-8"
)
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_create_document_with_empty_bytes(
self,
mock_magic,
document_service
):
"""Test behavior with empty file bytes."""
# Setup
mock_magic.return_value = "text/plain"
# Execute with empty bytes
result = document_service.create_document(
"/test/empty.txt",
b"", # Empty bytes
"utf-8"
)
# Verify file is saved to disk
validate_file_saved(document_service, result.file_hash, b"")
class TestGetMethods:
"""Tests for document retrieval methods."""
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_get_document_by_id(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test retrieving document by ID."""
# Setup
mock_magic.return_value = "application/pdf"
# Create a document first
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Execute
result = document_service.get_document_by_id(created_doc.id)
# Verify
assert result is not None
assert result.id == created_doc.id
assert result.filename == created_doc.filename
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_get_document_by_hash(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test retrieving document by file hash."""
# Setup
mock_magic.return_value = "application/pdf"
# Create a document first
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Execute
result = document_service.get_document_by_hash(created_doc.file_hash)
# Verify
assert result is not None
assert result.file_hash == created_doc.file_hash
assert result.filename == created_doc.filename
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_get_document_by_filepath(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test retrieving document by file path."""
# Setup
mock_magic.return_value = "application/pdf"
test_path = "/test/unique_test.pdf"
# Create a document first
created_doc = document_service.create_document(
test_path,
sample_file_bytes,
"utf-8"
)
# Execute
result = document_service.get_document_by_filepath(test_path)
# Verify
assert result is not None
assert result.filepath == test_path
assert result.id == created_doc.id
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_get_document_content(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test retrieving document with associated content."""
# Setup
mock_magic.return_value = "application/pdf"
# Create a document first
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Execute
result = document_service.get_document_content_by_hash(created_doc.file_hash)
# Verify
assert result == sample_file_bytes
def test_i_cannot_get_nonexistent_document_by_id(
self,
document_service
):
"""Test that nonexistent document returns None."""
# Execute with random ObjectId
result = document_service.get_document_by_id(ObjectId())
# Verify
assert result is None
def test_i_cannot_get_nonexistent_document_by_hash(
self,
document_service
):
"""Test that nonexistent document hash returns None."""
# Execute
result = document_service.get_document_by_hash("nonexistent_hash")
# Verify
assert result is None
class TestPaginationAndCounting:
"""Tests for document listing and counting."""
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_list_documents_with_pagination(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test document listing with pagination parameters."""
# Setup
mock_magic.return_value = "application/pdf"
# Create multiple documents
for i in range(5):
document_service.create_document(
f"/test/test{i}.pdf",
sample_file_bytes + bytes(str(i), 'utf-8'), # Make each file unique
"utf-8"
)
# Execute with pagination
result = document_service.list_documents(skip=1, limit=2)
# Verify
assert len(result) == 2
# Test counting
total_count = document_service.count_documents()
assert total_count == 5
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_count_documents(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test document counting."""
# Setup
mock_magic.return_value = "text/plain"
# Initially should be 0
initial_count = document_service.count_documents()
assert initial_count == 0
# Create some documents
for i in range(3):
document_service.create_document(
f"/test/test{i}.txt",
sample_file_bytes + bytes(str(i), 'utf-8'),
"utf-8"
)
# Execute
final_count = document_service.count_documents()
# Verify
assert final_count == 3
class TestUpdateAndDelete:
"""Tests for document update and deletion operations."""
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_update_document_metadata(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test updating document metadata."""
# Setup
mock_magic.return_value = "application/pdf"
# Create a document first
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Execute update
update_data = {"metadata": {"page_count": 5}}
result = document_service.update_document(created_doc.id, update_data)
# Verify
assert result is not None
assert result.metadata.get("page_count") == 5
assert result.filename == created_doc.filename
assert result.filepath == created_doc.filepath
assert result.file_hash == created_doc.file_hash
assert result.file_type == created_doc.file_type
assert result.metadata == update_data['metadata']
def test_i_can_update_document_content(
self,
document_service,
sample_file_bytes
):
# Create a document first
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Execute update
update_data = {"file_bytes": b"this is an updated file content"}
result = document_service.update_document(created_doc.id, update_data)
assert result.filename == created_doc.filename
assert result.filepath == created_doc.filepath
assert result.file_hash != created_doc.file_hash
assert result.file_type == created_doc.file_type
assert result.metadata == created_doc.metadata
# Verify file is saved to disk
validate_file_saved(document_service, result.file_hash, b"this is an updated file content")
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_delete_document_and_orphaned_content(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test deleting document with orphaned content cleanup."""
# Setup
mock_magic.return_value = "application/pdf"
# Create a document
created_doc = document_service.create_document(
"/test/test.pdf",
sample_file_bytes,
"utf-8"
)
# Verify content exists
validate_file_saved(document_service, created_doc.file_hash, sample_file_bytes)
# Execute deletion
result = document_service.delete_document(created_doc.id)
# Verify document and content are deleted
assert result is True
deleted_doc = document_service.get_document_by_id(created_doc.id)
assert deleted_doc is None
# validate content is deleted
file_hash = created_doc.file_hash[:24]
target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
assert not os.path.exists(target_file_path)
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_delete_document_without_affecting_shared_content(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test deleting document without removing shared content."""
# Setup
mock_magic.return_value = "application/pdf"
# Create two documents with same content
doc1 = document_service.create_document(
"/test/test1.pdf",
sample_file_bytes,
"utf-8"
)
doc2 = document_service.create_document(
"/test/test2.pdf",
sample_file_bytes,
"utf-8"
)
# They should share the same hash
assert doc1.file_hash == doc2.file_hash
# Delete first document
result = document_service.delete_document(doc1.id)
assert result is True
# Verify first document is deleted but content still exists
deleted_doc = document_service.get_document_by_id(doc1.id)
assert deleted_doc is None
remaining_doc = document_service.get_document_by_id(doc2.id)
assert remaining_doc is not None
validate_file_saved(document_service, doc2.file_hash, sample_file_bytes)
class TestHashCalculation:
"""Tests for file hash calculation utility."""
def test_i_can_calculate_consistent_file_hash(self, document_service):
"""Test that file hash calculation is consistent."""
test_bytes = b"Test content for hashing"
# Calculate hash multiple times
hash1 = document_service._calculate_file_hash(test_bytes)
hash2 = document_service._calculate_file_hash(test_bytes)
# Should be identical
assert hash1 == hash2
assert len(hash1) == 64 # SHA256 produces 64-character hex string
def test_i_get_different_hashes_for_different_content(self, document_service):
"""Test that different content produces different hashes."""
content1 = b"First content"
content2 = b"Second content"
hash1 = document_service._calculate_file_hash(content1)
hash2 = document_service._calculate_file_hash(content2)
assert hash1 != hash2
class TestFileTypeDetection:
"""Tests for file type detection."""
def test_i_can_detect_pdf_file_type(self, document_service):
"""Test PDF file type detection."""
file_type = document_service._detect_file_type("/path/to/document.pdf")
assert file_type == FileType.PDF
def test_i_can_detect_txt_file_type(self, document_service):
"""Test text file type detection."""
file_type = document_service._detect_file_type("/path/to/document.txt")
assert file_type == FileType.TXT
def test_i_can_detect_docx_file_type(self, document_service):
"""Test DOCX file type detection."""
file_type = document_service._detect_file_type("/path/to/document.docx")
assert file_type == FileType.DOCX
def test_i_cannot_detect_unsupported_file_type(self, document_service):
"""Test unsupported file type raises ValueError."""
with pytest.raises(ValueError, match="Unsupported file type"):
document_service._detect_file_type("/path/to/document.xyz")

View File

@@ -0,0 +1,518 @@
"""
Unit tests for JobService using in-memory MongoDB.
Tests the business logic operations with real MongoDB operations
using mongomock for better integration testing.
"""
import pytest
from bson import ObjectId
from mongomock.mongo_client import MongoClient
from app.exceptions.job_exceptions import InvalidStatusTransitionError
from app.models.job import ProcessingStatus
from app.models.types import PyObjectId
from app.services.job_service import JobService
@pytest.fixture
def in_memory_database():
"""Create an in-memory database for testing."""
client = MongoClient()
return client.test_database
@pytest.fixture
def job_service(in_memory_database):
"""Create JobService with in-memory repositories."""
service = JobService(in_memory_database).initialize()
return service
@pytest.fixture
def sample_document_id():
"""Sample file ObjectId."""
return PyObjectId()
@pytest.fixture
def sample_task_id():
"""Sample Celery task UUID."""
return "550e8400-e29b-41d4-a716-446655440000"
class TestCreateJob:
"""Tests for create_job method."""
def test_i_can_create_job_with_task_id(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test creating job with task ID."""
# Execute
result = job_service.create_job(sample_document_id, sample_task_id)
# Verify job creation
assert result is not None
assert result.document_id == sample_document_id
assert result.task_id == sample_task_id
assert result.status == ProcessingStatus.PENDING
assert result.created_at is not None
assert result.started_at is None
assert result.error_message is None
# Verify job exists in database
job_in_db = job_service.get_job_by_id(result.id)
assert job_in_db is not None
assert job_in_db.id == result.id
assert job_in_db.document_id == sample_document_id
assert job_in_db.task_id == sample_task_id
assert job_in_db.status == ProcessingStatus.PENDING
def test_i_can_create_job_without_task_id(
self,
job_service,
sample_document_id
):
"""Test creating job without task ID."""
# Execute
result = job_service.create_job(sample_document_id)
# Verify job creation
assert result is not None
assert result.document_id == sample_document_id
assert result.task_id is None
assert result.status == ProcessingStatus.PENDING
assert result.created_at is not None
assert result.started_at is None
assert result.error_message is None
class TestGetJobMethods:
"""Tests for job retrieval methods."""
def test_i_can_get_job_by_id(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test retrieving job by ID."""
# Create a job first
created_job = job_service.create_job(sample_document_id, sample_task_id)
# Execute
result = job_service.get_job_by_id(created_job.id)
# Verify
assert result is not None
assert result.id == created_job.id
assert result.document_id == created_job.document_id
assert result.task_id == created_job.task_id
assert result.status == created_job.status
def test_i_can_get_jobs_by_status(
self,
job_service,
sample_document_id
):
"""Test retrieving jobs by status."""
# Create jobs with different statuses
pending_job = job_service.create_job(sample_document_id, "pending-task")
processing_job = job_service.create_job(ObjectId(), "processing-task")
job_service.mark_job_as_started(processing_job.id)
completed_job = job_service.create_job(ObjectId(), "completed-task")
job_service.mark_job_as_started(completed_job.id)
job_service.mark_job_as_completed(completed_job.id)
# Execute - get pending jobs
pending_results = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
# Verify
assert len(pending_results) == 1
assert pending_results[0].id == pending_job.id
assert pending_results[0].status == ProcessingStatus.PENDING
# Execute - get processing jobs
processing_results = job_service.get_jobs_by_status(ProcessingStatus.PROCESSING)
assert len(processing_results) == 1
assert processing_results[0].status == ProcessingStatus.PROCESSING
# Execute - get completed jobs
completed_results = job_service.get_jobs_by_status(ProcessingStatus.COMPLETED)
assert len(completed_results) == 1
assert completed_results[0].status == ProcessingStatus.COMPLETED
class TestUpdateStatus:
"""Tests for mark_job_as_started method."""
def test_i_can_mark_pending_job_as_started(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test marking pending job as started (PENDING → PROCESSING)."""
# Create a pending job
created_job = job_service.create_job(sample_document_id, sample_task_id)
assert created_job.status == ProcessingStatus.PENDING
# Execute
result = job_service.mark_job_as_started(created_job.id)
# Verify status transition
assert result is not None
assert result.id == created_job.id
assert result.status == ProcessingStatus.PROCESSING
# Verify in database
updated_job = job_service.get_job_by_id(created_job.id)
assert updated_job.status == ProcessingStatus.PROCESSING
def test_i_cannot_mark_processing_job_as_started(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that processing job cannot be marked as started."""
# Create and start a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
# Try to start it again
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_started(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.PROCESSING
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
def test_i_cannot_mark_completed_job_as_started(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that completed job cannot be marked as started."""
# Create, start, and complete a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_completed(created_job.id)
# Try to start it again
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_started(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
def test_i_cannot_mark_failed_job_as_started(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that failed job cannot be marked as started."""
# Create, start, and fail a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_failed(created_job.id, "Test error")
# Try to start it again
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_started(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.FAILED
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
def test_i_can_mark_processing_job_as_completed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test marking processing job as completed (PROCESSING → COMPLETED)."""
# Create and start a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
started_job = job_service.mark_job_as_started(created_job.id)
# Execute
result = job_service.mark_job_as_completed(created_job.id)
# Verify status transition
assert result is not None
assert result.id == created_job.id
assert result.status == ProcessingStatus.COMPLETED
# Verify in database
updated_job = job_service.get_job_by_id(created_job.id)
assert updated_job.status == ProcessingStatus.COMPLETED
def test_i_cannot_mark_pending_job_as_completed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that pending job cannot be marked as completed."""
# Create a pending job
created_job = job_service.create_job(sample_document_id, sample_task_id)
# Try to complete it directly
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_completed(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.PENDING
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
def test_i_cannot_mark_completed_job_as_completed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that completed job cannot be marked as completed again."""
# Create, start, and complete a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_completed(created_job.id)
# Try to complete it again
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_completed(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
def test_i_cannot_mark_failed_job_as_completed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that failed job cannot be marked as completed."""
# Create, start, and fail a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_failed(created_job.id, "Test error")
# Try to complete it
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_completed(created_job.id)
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.FAILED
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
def test_i_can_mark_processing_job_as_failed_with_error_message(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test marking processing job as failed with error message."""
# Create and start a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
started_job = job_service.mark_job_as_started(created_job.id)
error_message = "Processing failed due to invalid file format"
# Execute
result = job_service.mark_job_as_failed(created_job.id, error_message)
# Verify status transition
assert result is not None
assert result.id == created_job.id
assert result.status == ProcessingStatus.FAILED
assert result.error_message == error_message
# Verify in database
updated_job = job_service.get_job_by_id(created_job.id)
assert updated_job.status == ProcessingStatus.FAILED
assert updated_job.error_message == error_message
def test_i_can_mark_processing_job_as_failed_without_error_message(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test marking processing job as failed without error message."""
# Create and start a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
# Execute without error message
result = job_service.mark_job_as_failed(created_job.id)
# Verify status transition
assert result is not None
assert result.status == ProcessingStatus.FAILED
assert result.error_message is None
def test_i_cannot_mark_pending_job_as_failed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that pending job cannot be marked as failed."""
# Create a pending job
created_job = job_service.create_job(sample_document_id, sample_task_id)
# Try to fail it directly
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_failed(created_job.id, "Test error")
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.PENDING
assert exc_info.value.target_status == ProcessingStatus.FAILED
def test_i_cannot_mark_completed_job_as_failed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that completed job cannot be marked as failed."""
# Create, start, and complete a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_completed(created_job.id)
# Try to fail it
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_failed(created_job.id, "Test error")
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
assert exc_info.value.target_status == ProcessingStatus.FAILED
def test_i_cannot_mark_failed_job_as_failed(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that failed job cannot be marked as failed again."""
# Create, start, and fail a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
job_service.mark_job_as_failed(created_job.id, "First error")
# Try to fail it again
with pytest.raises(InvalidStatusTransitionError) as exc_info:
job_service.mark_job_as_failed(created_job.id, "Second error")
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.FAILED
assert exc_info.value.target_status == ProcessingStatus.FAILED
class TestDeleteJob:
"""Tests for delete_job method."""
def test_i_can_delete_existing_job(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test deleting an existing job."""
# Create a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
# Verify job exists
job_before_delete = job_service.get_job_by_id(created_job.id)
assert job_before_delete is not None
# Execute deletion
result = job_service.delete_job(created_job.id)
# Verify deletion
assert result is True
# Verify job no longer exists
deleted_job = job_service.get_job_by_id(created_job.id)
assert deleted_job is None
def test_i_cannot_delete_nonexistent_job(
self,
job_service
):
"""Test deleting a nonexistent job returns False."""
# Execute deletion with random ObjectId
result = job_service.delete_job(ObjectId())
# Verify
assert result is False
class TestStatusTransitionValidation:
"""Tests for status transition validation across different scenarios."""
def test_valid_job_lifecycle_flow(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test complete valid job lifecycle: PENDING → PROCESSING → COMPLETED."""
# Create job (PENDING)
job = job_service.create_job(sample_document_id, sample_task_id)
assert job.status == ProcessingStatus.PENDING
# Start job (PENDING → PROCESSING)
started_job = job_service.mark_job_as_started(job.id)
assert started_job.status == ProcessingStatus.PROCESSING
# Complete job (PROCESSING → COMPLETED)
completed_job = job_service.mark_job_as_completed(job.id)
assert completed_job.status == ProcessingStatus.COMPLETED
def test_valid_job_failure_flow(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test valid job failure: PENDING → PROCESSING → FAILED."""
# Create job (PENDING)
job = job_service.create_job(sample_document_id, sample_task_id)
assert job.status == ProcessingStatus.PENDING
# Start job (PENDING → PROCESSING)
started_job = job_service.mark_job_as_started(job.id)
assert started_job.status == ProcessingStatus.PROCESSING
# Fail job (PROCESSING → FAILED)
failed_job = job_service.mark_job_as_failed(job.id, "Test failure")
assert failed_job.status == ProcessingStatus.FAILED
assert failed_job.error_message == "Test failure"
def test_job_operations_with_empty_database(
self,
job_service
):
"""Test job operations when database is empty."""
# Try to get nonexistent job
result = job_service.get_job_by_id(ObjectId())
assert result is None
# Try to get jobs by status when none exist
pending_jobs = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
assert pending_jobs == []
# Try to delete nonexistent job
delete_result = job_service.delete_job(ObjectId())
assert delete_result is False