I can put a new file and create the associated pdf

This commit is contained in:
2025-10-05 23:54:59 +02:00
parent bd52f2d296
commit 8ae9754fde
14 changed files with 376 additions and 45 deletions

View File

@@ -568,3 +568,137 @@ class TestFileTypeDetection:
"""Test unsupported file type raises ValueError."""
with pytest.raises(ValueError, match="Unsupported file type"):
document_service._detect_file_type("/path/to/document.xyz")
class TestCreatePdf:
"""Tests for create_pdf method."""
@patch('app.services.document_service.convert_to_pdf')
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_create_pdf_successfully(
self,
mock_magic,
mock_convert_to_pdf,
document_service,
sample_file_bytes
):
"""Test creating PDF from an existing document."""
# Setup
mock_magic.return_value = "text/plain"
# Create a document first
created_doc = document_service.create_document(
"/test/test.txt",
sample_file_bytes,
"utf-8"
)
# Mock the PDF conversion
pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
mock_convert_to_pdf.return_value = pdf_path
# Write a sample PDF file that the conversion would create
pdf_content = b"This is PDF content"
os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
with open(pdf_path, "wb") as f:
f.write(pdf_content)
# Execute
result = document_service.create_pdf(created_doc.id)
# Verify
assert result is True
# Get the updated document
updated_doc = document_service.get_document_by_id(created_doc.id)
assert updated_doc.pdf_file_hash is not None
# Verify the PDF content was saved
pdf_hash = document_service._calculate_file_hash(pdf_content)
assert updated_doc.pdf_file_hash == pdf_hash
# Verify convert_to_pdf was called with correct arguments
doc_path = document_service._get_document_path(created_doc.file_hash)
mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
# Verify content exists on disk
validate_file_saved(document_service, pdf_hash, pdf_content)
# Verify PDF hash was added to document
updated_doc = document_service.get_document_by_id(created_doc.id)
pdf_hash = document_service._calculate_file_hash(pdf_content)
assert updated_doc.pdf_file_hash == pdf_hash
@patch('app.services.document_service.convert_to_pdf')
@patch('app.services.document_service.magic.from_buffer')
def test_i_can_reuse_existing_pdf(
self,
mock_magic,
mock_convert_to_pdf,
document_service,
sample_file_bytes
):
"""Test that if PDF already exists, it doesn't recreate it."""
# Setup
mock_magic.return_value = "text/plain"
# Create a document first
created_doc = document_service.create_document(
"/test/test.txt",
sample_file_bytes,
"utf-8"
)
# Create a fake PDF file and update the document
pdf_content = b"This is PDF content"
pdf_hash = document_service._calculate_file_hash(pdf_content)
document_service.save_content_if_needed(pdf_hash, pdf_content)
document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
# Execute
result = document_service.create_pdf(created_doc.id)
# Verify
assert result is True
# Verify convert_to_pdf was NOT called
mock_convert_to_pdf.assert_not_called()
def test_i_cannot_create_pdf_for_nonexistent_document(
self,
document_service
):
"""Test behavior when document ID doesn't exist."""
# Execute with random ObjectId
result = document_service.create_pdf(ObjectId())
# Verify
assert result is False
@patch('app.services.document_service.magic.from_buffer')
def test_i_cannot_create_pdf_when_file_content_missing(
self,
mock_magic,
document_service,
sample_file_bytes
):
"""Test behavior when file content doesn't exist."""
# Setup
mock_magic.return_value = "text/plain"
# Create a document
created_doc = document_service.create_document(
"/test/test.txt",
sample_file_bytes,
"utf-8"
)
# Simulate missing content by removing file
file_path = document_service._get_document_path(created_doc.file_hash)
os.remove(file_path)
# Execute
result = document_service.create_pdf(created_doc.id)
# Verify
assert result is False

View File

@@ -417,6 +417,25 @@ class TestUpdateStatus:
# Verify exception details
assert exc_info.value.current_status == ProcessingStatus.FAILED
assert exc_info.value.target_status == ProcessingStatus.FAILED
def test_i_can_update_job_status(
self,
job_service,
sample_document_id,
sample_task_id
):
"""Test that failed job cannot be marked as failed again."""
# Create, start, and fail a job
created_job = job_service.create_job(sample_document_id, sample_task_id)
job_service.mark_job_as_started(created_job.id)
# Execute without error message
result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
# Verify status transition
assert result is not None
assert result.status == ProcessingStatus.SAVING_OBJECT
assert result.error_message is None
class TestDeleteJob:

View File

@@ -4,7 +4,7 @@ from pathlib import Path
import pytest
from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
@pytest.fixture
@@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir):
input_txt.write_text("Hello World!\nThis is a test.")
converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
output_pdf = converter.convert()
converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")
assert Path(converter.output_path).exists()
assert str(converter.output_path).endswith(".pdf")
def test_i_can_convert_image_to_pdf(temp_dir):
@@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir):
image.save(input_img)
converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
output_pdf = converter.convert()
converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")
assert Path(converter.output_path).exists()
assert str(converter.output_path).endswith(".pdf")
def test_i_can_convert_word_to_pdf(temp_dir):
@@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir):
doc.save(input_docx)
converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
output_pdf = converter.convert()
converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")
assert Path(converter.output_path).exists()
assert str(converter.output_path).endswith(".pdf")