From 8ae9754fdeaa8e2dca247b62ee3f5f3ef8b274f9 Mon Sep 17 00:00:00 2001
From: Kodjo Sossouvi <kodjo.sossouvi@gmail.com>
Date: Sun, 5 Oct 2025 23:54:59 +0200
Subject: [PATCH] I can put a new file and create the associated pdf

---
 docker-compose.yml                            |   4 +
 src/file-processor/app/config/settings.py     |  10 ++
 .../repositories/document_repository.py       |  41 ++++++
 src/file-processor/app/file_watcher.py        |   3 +-
 src/file-processor/app/models/document.py     |   1 +
 .../app/services/document_service.py          | 120 +++++++++++++++-
 .../app/services/job_service.py               |  11 +-
 .../app/utils}/pdf_converter.py               |   0
 src/worker/tasks/common/converter_utils.py    |   7 +
 src/worker/tasks/document_processing.py       |  40 +++---
 src/worker/tasks/main.py                      |  11 +-
 tests/services/test_document_service.py       | 134 ++++++++++++++++++
 tests/services/test_job_service.py            |  19 +++
 tests/{common => utils}/test_pdf_converter.py |  20 +--
 14 files changed, 376 insertions(+), 45 deletions(-)
 rename src/{worker/tasks/common => file-processor/app/utils}/pdf_converter.py (100%)
 rename tests/{common => utils}/test_pdf_converter.py (66%)

diff --git a/docker-compose.yml b/docker-compose.yml
index a702201..170967b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,6 +40,8 @@ services:
       - ./src/worker/tasks:/app/tasks          # <- Added: shared access to worker tasks
       - ./volumes/watched_files:/watched_files
       - ./volumes/objects:/objects
+      - ./volumes/errors:/errors
+      - ./volumes/ignored:/ignored
     depends_on:
       - redis
       - mongodb
@@ -62,6 +64,8 @@ services:
       - ./src/file-processor/app:/app/app     # <- Added: shared access file-processor app
       - ./volumes/watched_files:/watched_files
       - ./volumes/objects:/objects
+      - ./volumes/errors:/errors
+      - ./volumes/ignored:/ignored
     depends_on:
       - redis
       - mongodb
diff --git a/src/file-processor/app/config/settings.py b/src/file-processor/app/config/settings.py
index 94cf48a..ab29e0c 100644
--- a/src/file-processor/app/config/settings.py
+++ b/src/file-processor/app/config/settings.py
@@ -106,3 +106,13 @@ def get_watch_folder() -> str:
 def get_temp_folder() -> str:
   """Directory to store temporary files"""
   return os.getenv("TEMP_DIRECTORY", "/tmp")
+
+
+def get_errors_folder() -> str:
+  """Directory to store temporary files"""
+  return os.getenv("ERRORS_DIRECTORY", "/errors")
+
+
+def get_ignored_folder() -> str:
+  """Directory to store temporary files"""
+  return os.getenv("IGNORED_DIRECTORY", "/ignored")
diff --git a/src/file-processor/app/database/repositories/document_repository.py b/src/file-processor/app/database/repositories/document_repository.py
index 0d8b6dc..e0e28c3 100644
--- a/src/file-processor/app/database/repositories/document_repository.py
+++ b/src/file-processor/app/database/repositories/document_repository.py
@@ -130,6 +130,47 @@ class FileDocumentRepository:
     except PyMongoError:
       return None
   
+  def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]:
+    """
+    Find file document by file hash with a pdf_file_hash set (not None).
+
+    Args:
+        file_hash (str): SHA256 hash of file content
+
+    Returns:
+        FileDocument or None: File document if found, None otherwise
+    """
+    try:
+      file_doc = self.collection.find_one({"file_hash": file_hash,
+                                           "pdf_file_hash": {"$ne": None}})
+      if file_doc:
+        return FileDocument(**file_doc)
+      return None
+    
+    except PyMongoError:
+      return None
+  
+  def find_same_document(self, filename: str, file_hash: str):
+    """
+    Find document with the same file_name and the same file hash
+
+    Args:
+        filename (str):
+        file_hash (str): SHA256 hash of file content
+
+    Returns:
+        FileDocument or None: File document if found, None otherwise
+    """
+    try:
+      file_doc = self.collection.find_one({"file_hash": file_hash,
+                                           "filename": filename})
+      if file_doc:
+        return FileDocument(**file_doc)
+      return None
+    
+    except PyMongoError:
+      return None
+  
   def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
     """
     Find file document by exact filepath.
diff --git a/src/file-processor/app/file_watcher.py b/src/file-processor/app/file_watcher.py
index f3b9eb0..821dc59 100644
--- a/src/file-processor/app/file_watcher.py
+++ b/src/file-processor/app/file_watcher.py
@@ -30,7 +30,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
   dispatching Celery tasks, and managing processing jobs.
   """
   
-  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
+  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'}
   
   def __init__(self, document_service: DocumentService, job_service: JobService):
     """
@@ -59,6 +59,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
     
     if file_extension not in self.SUPPORTED_EXTENSIONS:
       logger.info(f"Ignoring unsupported file type: {filepath}")
+      self.document_service.move_to_ignored(filepath, "unsupported file type")
       return
     
     logger.info(f"Processing new file: {filepath}")
diff --git a/src/file-processor/app/models/document.py b/src/file-processor/app/models/document.py
index 19d9bfe..105bffd 100644
--- a/src/file-processor/app/models/document.py
+++ b/src/file-processor/app/models/document.py
@@ -49,6 +49,7 @@ class FileDocument(BaseModel):
   metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
   detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
   file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
+  pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content")
   encoding: str = Field(default="utf-8", description="Character encoding for text files")
   file_size: int = Field(..., ge=0, description="File size in bytes")
   mime_type: str = Field(..., description="MIME type detected")
diff --git a/src/file-processor/app/services/document_service.py b/src/file-processor/app/services/document_service.py
index 9b30c8e..f20dc90 100644
--- a/src/file-processor/app/services/document_service.py
+++ b/src/file-processor/app/services/document_service.py
@@ -6,7 +6,9 @@ while maintaining data consistency through MongoDB transactions.
 """
 
 import hashlib
+import logging
 import os
+import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Dict, Any
@@ -14,13 +16,16 @@ from typing import List, Optional, Dict, Any
 import magic
 from pymongo.errors import PyMongoError
 
-from app.config.settings import get_objects_folder
+from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder
 from app.database.repositories.document_repository import FileDocumentRepository
 from app.models.document import (
   FileDocument,
   FileType,
 )
 from app.models.types import PyObjectId
+from app.utils.pdf_converter import convert_to_pdf
+
+logger = logging.getLogger(__name__)
 
 
 class DocumentService:
@@ -31,7 +36,11 @@ class DocumentService:
   and their content while ensuring data consistency through transactions.
   """
   
-  def __init__(self, database, objects_folder: str = None):
+  def __init__(self, database,
+               objects_folder: str = None,
+               temp_folder: str = None,
+               errors_folder: str = None,
+               ignored_folder: str = None):
     """
     Initialize the document service with repository dependencies.
     
@@ -43,6 +52,9 @@ class DocumentService:
     self.db = database
     self.document_repository = FileDocumentRepository(self.db)
     self.objects_folder = objects_folder or get_objects_folder()
+    self.temp_folder = temp_folder or get_temp_folder()
+    self.errors_folder = errors_folder or get_errors_folder()
+    self.ignored_folder = ignored_folder or get_ignored_folder()
   
   def initialize(self):
     self.document_repository.initialize()
@@ -117,6 +129,39 @@ class DocumentService:
     
     return path.read_bytes()
   
+  @staticmethod
+  def _get_safe_path(file_path):
+    """
+    If the path already exists, add a suffix to the filename.
+    Increment the suffix until a safe path is found.
+    :param file_path:
+    :return:
+    """
+    path = Path(file_path)
+    
+    # If the path doesn't exist, return it as is
+    if not path.exists():
+      return file_path
+    
+    # Split the filename and extension
+    stem = path.stem
+    suffix = path.suffix
+    directory = path.parent
+    
+    # Try incrementing numbers until a unique path is found
+    counter = 1
+    while True:
+      # Create new filename with counter
+      new_filename = f"{stem}_{counter}{suffix}"
+      new_path = os.path.join(directory, new_filename)
+      
+      # Check if this new path exists
+      if not os.path.exists(new_path):
+        return new_path
+      
+      # Increment counter for next attempt
+      counter += 1
+  
   def _get_document_path(self, file_hash):
     """
 
@@ -125,6 +170,9 @@ class DocumentService:
     """
     return os.path.join(self.objects_folder, file_hash[:24], file_hash)
   
+  def exists(self, file_hash):
+    return os.path.exists(self._get_document_path(file_hash))
+  
   def save_content_if_needed(self, file_hash, content: bytes):
     target_path = self._get_document_path(file_hash)
     if os.path.exists(target_path):
@@ -136,6 +184,18 @@ class DocumentService:
     with open(target_path, "wb") as f:
       f.write(content)
   
+  def move_to_errors(self, document_id, file_path):
+    logger.info(f"Moving file {file_path} to error folder")
+    error_file_name = f"{document_id}_{os.path.basename(file_path)}"
+    error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name))
+    shutil.move(file_path, error_file_path)
+  
+  def move_to_ignored(self, file_path, reason="Unknown"):
+    logger.info(f"Moving file {file_path} to ignored folder")
+    ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(file_path)
+    ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name))
+    shutil.move(file_path, ignored_file_path)
+  
   def create_document(
       self,
       file_path: str,
@@ -171,7 +231,15 @@ class DocumentService:
     detected_at = datetime.now()
     
     try:
+      logger.info(f"Creating Document for {file_path}")
+      # Skip the document if it already exists
+      same_document = self.document_repository.find_same_document(filename, file_hash)
+      if same_document is not None:
+        logger.info(f"  Document with same hash already exists. Skipping...")
+        self.move_to_ignored(file_path, f"already exists ({same_document.id})")
+      
       self.save_content_if_needed(file_hash, file_bytes)
+      logger.info(f"  Saved content to {self._get_document_path(file_hash)}")
       
       # Create FileDocument
       file_data = FileDocument(
@@ -188,6 +256,7 @@ class DocumentService:
       )
       
       created_file = self.document_repository.create_document(file_data)
+      logger.info(f"  Created document with id '{created_file.id}'")
       
       return created_file
     
@@ -195,6 +264,50 @@ class DocumentService:
       # Transaction will automatically rollback if supported
       raise PyMongoError(f"Failed to create document: {str(e)}")
   
+  def create_pdf(self, document_id: PyObjectId):
+    """
+    For all files, a controlled pdf version will be created for standard visualization and action
+    :return:
+    """
+    logger.info(f"Creating PDF document for {document_id}")
+    document = self.get_document_by_id(document_id)
+    if document is None:
+      logger.error(f"  Document not found")
+      raise ValueError(f"Document {document_id} not found")
+    
+    # try to find another document that has the same hash
+    document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
+    
+    # the pdf will be created only if it does not exist yet
+    if (document_with_same_hash is not None and
+        document_with_same_hash.pdf_file_hash and
+        self.exists(document_with_same_hash.pdf_file_hash)):
+      logger.info(f"Found document with same hash. Will use pdf {document_with_same_hash.pdf_file_hash}")
+      self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash})
+      return True
+    
+    # get the content of the file
+    logger.info(f"  No document with same hash found and valid pdf found. Will create new pdf")
+    file_bytes = self.get_document_content_by_hash(document.file_hash)
+    if file_bytes is None:
+      logger.error(f"Content for document {document_id} not found. hash = {document.file_hash}.")
+      return False
+    
+    # create the pdf file
+    temp_pdf_file = convert_to_pdf(self._get_document_path(document.file_hash), self.temp_folder)
+    pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file))
+    self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file))
+    logger.info(f"  Created new pdf file with hash {pdf_file_hash}")
+    
+    # remove the temporary file
+    os.remove(temp_pdf_file)
+    logger.info(f"  Removed temporary pdf file {temp_pdf_file}")
+    
+    # update the document
+    self.update_document(document_id, {"pdf_file_hash": pdf_file_hash})
+    
+    return True
+  
   def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
     """
     Retrieve a document by its ID.
@@ -219,6 +332,9 @@ class DocumentService:
     """
     return self.document_repository.find_document_by_hash(file_hash)
   
+  def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]:
+    return self.document_repository.find_document_with_pdf_hash(file_hash)
+  
   def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
     """
     Retrieve a document by its file path.
diff --git a/src/file-processor/app/services/job_service.py b/src/file-processor/app/services/job_service.py
index ff55c6d..d7b4bc1 100644
--- a/src/file-processor/app/services/job_service.py
+++ b/src/file-processor/app/services/job_service.py
@@ -111,7 +111,9 @@ class JobService:
     current_job = self.repository.find_job_by_id(job_id)
     
     # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING,
+                              ProcessingStatus.COMPLETED,
+                              ProcessingStatus.FAILED):
       raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
     
     # Update status
@@ -141,7 +143,7 @@ class JobService:
     current_job = self.repository.find_job_by_id(job_id)
     
     # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
       raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
     
     # Update status with error message
@@ -151,6 +153,11 @@ class JobService:
       error_message
     )
   
+  def update_job_status(self, job_id: PyObjectId,
+                        status: ProcessingStatus,
+                        error_message: str = None) -> ProcessingJob:
+    return self.repository.update_job_status(job_id, status, error_message)
+  
   def delete_job(self, job_id: PyObjectId) -> bool:
     """
     Delete a job from the database.
diff --git a/src/worker/tasks/common/pdf_converter.py b/src/file-processor/app/utils/pdf_converter.py
similarity index 100%
rename from src/worker/tasks/common/pdf_converter.py
rename to src/file-processor/app/utils/pdf_converter.py
diff --git a/src/worker/tasks/common/converter_utils.py b/src/worker/tasks/common/converter_utils.py
index 61f65b2..e85e300 100644
--- a/src/worker/tasks/common/converter_utils.py
+++ b/src/worker/tasks/common/converter_utils.py
@@ -20,12 +20,19 @@ def detect_file_type(file_path: str) -> str:
       UnsupportedFileTypeError: If file type is not supported.
   """
   mime = magic.from_file(file_path, mime=True)
+  extension = Path(file_path).suffix
   if mime.startswith("text/"):
     return "text"
   elif mime.startswith("image/"):
     return "image"
   elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
     return "word"
+  elif mime == "application/pdf":
+    return "pdf"
+  elif mime == "application/vnd.ms-powerpoint":
+    return "powerpoint"
+  elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"):
+    return "image"
   else:
     raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
 
diff --git a/src/worker/tasks/document_processing.py b/src/worker/tasks/document_processing.py
index 6524901..b19d5fd 100644
--- a/src/worker/tasks/document_processing.py
+++ b/src/worker/tasks/document_processing.py
@@ -6,14 +6,14 @@ and update processing job statuses throughout the task lifecycle.
 """
 
 import logging
+import os
 from typing import Any, Dict
 
 from app.config import settings
 from app.database.connection import get_database
+from app.models.job import ProcessingStatus
 from app.services.document_service import DocumentService
 from app.services.job_service import JobService
-from tasks.common.document_utils import save_as_object
-from tasks.common.pdf_converter import convert_to_pdf
 from tasks.main import celery_app
 
 logger = logging.getLogger(__name__)
@@ -26,7 +26,8 @@ def get_services():
   return document_service, job_service
 
 
-@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+#@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+@celery_app.task(bind=True)
 def process_document(self, filepath: str) -> Dict[str, Any]:
   """
   Process a document file and extract its content.
@@ -46,41 +47,31 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
   Raises:
       Exception: Any processing error (will trigger retry)
   """
-  task_id = self.request.id 
+  task_id = self.request.id
   logger.info(f"Starting document processing task {task_id} for file: {filepath}")
   
   # get services
   document_service, job_service = get_services()
   
   job = None
+  document = None
   try:
     # Step 1: Create the document and a new job record for the document
     document = document_service.create_document(filepath)
     job = job_service.create_job(task_id=task_id, document_id=document.id)
     job_service.mark_job_as_started(job_id=job.id)
-    logger.info(f"Task {task_id} created for document {document.id} with file path: {filepath} and job id: {job.id}")
+    logger.info(f"Task {task_id} created for document {document.id} from file path: {filepath} and job id: {job.id}")
     
-    logger.info(f"Job {task_id} marked as PROCESSING")
-
-    raw_file_hash = save_as_object(filepath)
-    logger.info(f"Job {task_id} saved document as object: {raw_file_hash}")
+    logger.info(f"Task {task_id} : Creating associated PDF")
+    job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF)
+    document_service.create_pdf(document.id)
     
-    # Step 4: Create the pdf version of the document
-    pdf_file_hash = convert_to_pdf(filepath, raw_file_hash)
-    logger.info(f"Job {task_id} saved PDF with hash: {pdf_file_hash}")
-    
-    
-    
-    # Step 3: Mark job as started
-    
-    # Step 4: Create the pdf version of the document
-    pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
-    digest = save_as_object(pdf_file_path)
-    logger.info(f"Job {task_id} internal PDF file created: {digest}")
+    # remove the file from the watch folder
+    os.remove(filepath)
     
     # Step x: Mark job as completed
     job_service.mark_job_as_completed(job_id=job.id)
-    logger.info(f"Job {task_id} marked as COMPLETED")
+    logger.info(f"Task {task_id} marked as COMPLETED")
     
     return {
         "task_id": task_id,
@@ -99,6 +90,11 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
         logger.info(f"Job {task_id} marked as FAILED")
       else:
         logger.error(f"Failed to process {filepath}. error = {str(e)}")
+      
+      if document is not None:
+        document_service.move_to_errors(document.id, filepath)
+        logger.info(f"Moved file {filepath} to errors/{document.id}")
+    
     except Exception as job_error:
       logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
     
diff --git a/src/worker/tasks/main.py b/src/worker/tasks/main.py
index ada5228..97f4661 100644
--- a/src/worker/tasks/main.py
+++ b/src/worker/tasks/main.py
@@ -41,15 +41,10 @@ celery_app.conf.update(
 
 def global_init(**kwargs):
   """Initialize global variables."""
-  logger.info(f"{'*' * 20}")
+  logger.info(f"{'*' * 45}")
   logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}")
-  logger.info(f"{'*' * 20}")
-  tmp_folder = settings.get_temp_folder()
-  if not os.path.exists(tmp_folder):
-    logger.info(f"Creating temporary folder: {tmp_folder}")
-    os.makedirs(tmp_folder)
-  else:
-    logger.info(f"Temporary folder already exists: {os.path.abspath(tmp_folder)}")
+  logger.info(f"{'*' * 45}")
+
 
 global_init()
 
diff --git a/tests/services/test_document_service.py b/tests/services/test_document_service.py
index 5ca9867..80fb157 100644
--- a/tests/services/test_document_service.py
+++ b/tests/services/test_document_service.py
@@ -568,3 +568,137 @@ class TestFileTypeDetection:
     """Test unsupported file type raises ValueError."""
     with pytest.raises(ValueError, match="Unsupported file type"):
       document_service._detect_file_type("/path/to/document.xyz")
+
+
+class TestCreatePdf:
+  """Tests for create_pdf method."""
+  
+  @patch('app.services.document_service.convert_to_pdf')
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_create_pdf_successfully(
+      self,
+      mock_magic,
+      mock_convert_to_pdf,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test creating PDF from an existing document."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Mock the PDF conversion
+    pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
+    mock_convert_to_pdf.return_value = pdf_path
+    
+    # Write a sample PDF file that the conversion would create
+    pdf_content = b"This is PDF content"
+    os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
+    with open(pdf_path, "wb") as f:
+      f.write(pdf_content)
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is True
+    
+    # Get the updated document
+    updated_doc = document_service.get_document_by_id(created_doc.id)
+    assert updated_doc.pdf_file_hash is not None
+    
+    # Verify the PDF content was saved
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    assert updated_doc.pdf_file_hash == pdf_hash
+    
+    # Verify convert_to_pdf was called with correct arguments
+    doc_path = document_service._get_document_path(created_doc.file_hash)
+    mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
+    
+    # Verify content exists on disk
+    validate_file_saved(document_service, pdf_hash, pdf_content)
+    
+    # Verify PDF hash was added to document
+    updated_doc = document_service.get_document_by_id(created_doc.id)
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    assert updated_doc.pdf_file_hash == pdf_hash
+  
+  @patch('app.services.document_service.convert_to_pdf')
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_reuse_existing_pdf(
+      self,
+      mock_magic,
+      mock_convert_to_pdf,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test that if PDF already exists, it doesn't recreate it."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Create a fake PDF file and update the document
+    pdf_content = b"This is PDF content"
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    document_service.save_content_if_needed(pdf_hash, pdf_content)
+    document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is True
+    
+    # Verify convert_to_pdf was NOT called
+    mock_convert_to_pdf.assert_not_called()
+  
+  def test_i_cannot_create_pdf_for_nonexistent_document(
+      self,
+      document_service
+  ):
+    """Test behavior when document ID doesn't exist."""
+    # Execute with random ObjectId
+    result = document_service.create_pdf(ObjectId())
+    
+    # Verify
+    assert result is False
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_cannot_create_pdf_when_file_content_missing(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test behavior when file content doesn't exist."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Simulate missing content by removing file
+    file_path = document_service._get_document_path(created_doc.file_hash)
+    os.remove(file_path)
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is False
diff --git a/tests/services/test_job_service.py b/tests/services/test_job_service.py
index 5307ab9..c404470 100644
--- a/tests/services/test_job_service.py
+++ b/tests/services/test_job_service.py
@@ -417,6 +417,25 @@ class TestUpdateStatus:
     # Verify exception details
     assert exc_info.value.current_status == ProcessingStatus.FAILED
     assert exc_info.value.target_status == ProcessingStatus.FAILED
+  
+  def test_i_can_update_job_status(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that failed job cannot be marked as failed again."""
+    # Create, start, and fail a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    
+    # Execute without error message
+    result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.status == ProcessingStatus.SAVING_OBJECT
+    assert result.error_message is None
 
 
 class TestDeleteJob:
diff --git a/tests/common/test_pdf_converter.py b/tests/utils/test_pdf_converter.py
similarity index 66%
rename from tests/common/test_pdf_converter.py
rename to tests/utils/test_pdf_converter.py
index a5718fd..04a56a6 100644
--- a/tests/common/test_pdf_converter.py
+++ b/tests/utils/test_pdf_converter.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
 import pytest
 
-from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
+from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
 
 
 @pytest.fixture
@@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir):
   input_txt.write_text("Hello World!\nThis is a test.")
   
   converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
   
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")
 
 
 def test_i_can_convert_image_to_pdf(temp_dir):
@@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir):
   image.save(input_img)
   
   converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
   
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")
 
 
 def test_i_can_convert_word_to_pdf(temp_dir):
@@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir):
   doc.save(input_docx)
   
   converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
   
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")