I can put a new file and create the associated pdf

2025-10-05 23:54:59 +02:00
parent bd52f2d296
commit 8ae9754fde
14 changed files with 376 additions and 45 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,6 +40,8 @@ services:
      - ./src/worker/tasks:/app/tasks          # <- Added: shared access to worker tasks
      - ./volumes/watched_files:/watched_files
      - ./volumes/objects:/objects
+      - ./volumes/errors:/errors
+      - ./volumes/ignored:/ignored
    depends_on:
      - redis
      - mongodb
@@ -62,6 +64,8 @@ services:
      - ./src/file-processor/app:/app/app     # <- Added: shared access file-processor app
      - ./volumes/watched_files:/watched_files
      - ./volumes/objects:/objects
+      - ./volumes/errors:/errors
+      - ./volumes/ignored:/ignored
    depends_on:
      - redis
      - mongodb
--- a/src/file-processor/app/config/settings.py
+++ b/src/file-processor/app/config/settings.py
@@ -106,3 +106,13 @@ def get_watch_folder() -> str:
 def get_temp_folder() -> str:
  """Directory to store temporary files"""
  return os.getenv("TEMP_DIRECTORY", "/tmp")
+
+
+def get_errors_folder() -> str:
+  """Directory to store temporary files"""
+  return os.getenv("ERRORS_DIRECTORY", "/errors")
+
+
+def get_ignored_folder() -> str:
+  """Directory to store temporary files"""
+  return os.getenv("IGNORED_DIRECTORY", "/ignored")
--- a/src/file-processor/app/database/repositories/document_repository.py
+++ b/src/file-processor/app/database/repositories/document_repository.py
@@ -130,6 +130,47 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  
+  def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]:
+    """
+    Find file document by file hash with a pdf_file_hash set (not None).
+
+    Args:
+        file_hash (str): SHA256 hash of file content
+
+    Returns:
+        FileDocument or None: File document if found, None otherwise
+    """
+    try:
+      file_doc = self.collection.find_one({"file_hash": file_hash,
+                                           "pdf_file_hash": {"$ne": None}})
+      if file_doc:
+        return FileDocument(**file_doc)
+      return None
+    
+    except PyMongoError:
+      return None
+  
+  def find_same_document(self, filename: str, file_hash: str):
+    """
+    Find document with the same file_name and the same file hash
+
+    Args:
+        filename (str):
+        file_hash (str): SHA256 hash of file content
+
+    Returns:
+        FileDocument or None: File document if found, None otherwise
+    """
+    try:
+      file_doc = self.collection.find_one({"file_hash": file_hash,
+                                           "filename": filename})
+      if file_doc:
+        return FileDocument(**file_doc)
+      return None
+    
+    except PyMongoError:
+      return None
+  
  def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Find file document by exact filepath.
--- a/src/file-processor/app/file_watcher.py
+++ b/src/file-processor/app/file_watcher.py
@@ -30,7 +30,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
  dispatching Celery tasks, and managing processing jobs.
  """
  
-  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
+  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'}
  
  def __init__(self, document_service: DocumentService, job_service: JobService):
    """
@@ -59,6 +59,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
    
    if file_extension not in self.SUPPORTED_EXTENSIONS:
      logger.info(f"Ignoring unsupported file type: {filepath}")
+      self.document_service.move_to_ignored(filepath, "unsupported file type")
      return
    
    logger.info(f"Processing new file: {filepath}")
--- a/src/file-processor/app/models/document.py
+++ b/src/file-processor/app/models/document.py
@@ -49,6 +49,7 @@ class FileDocument(BaseModel):
  metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
  detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
  file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
+  pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content")
  encoding: str = Field(default="utf-8", description="Character encoding for text files")
  file_size: int = Field(..., ge=0, description="File size in bytes")
  mime_type: str = Field(..., description="MIME type detected")
--- a/src/file-processor/app/services/document_service.py
+++ b/src/file-processor/app/services/document_service.py
@@ -6,7 +6,9 @@ while maintaining data consistency through MongoDB transactions.
 """

 import hashlib
+import logging
 import os
+import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Dict, Any
@@ -14,13 +16,16 @@ from typing import List, Optional, Dict, Any
 import magic
 from pymongo.errors import PyMongoError

-from app.config.settings import get_objects_folder
+from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder
 from app.database.repositories.document_repository import FileDocumentRepository
 from app.models.document import (
  FileDocument,
  FileType,
 )
 from app.models.types import PyObjectId
+from app.utils.pdf_converter import convert_to_pdf
+
+logger = logging.getLogger(__name__)


 class DocumentService:
@@ -31,7 +36,11 @@ class DocumentService:
  and their content while ensuring data consistency through transactions.
  """
  
-  def __init__(self, database, objects_folder: str = None):
+  def __init__(self, database,
+               objects_folder: str = None,
+               temp_folder: str = None,
+               errors_folder: str = None,
+               ignored_folder: str = None):
    """
    Initialize the document service with repository dependencies.
    
@@ -43,6 +52,9 @@ class DocumentService:
    self.db = database
    self.document_repository = FileDocumentRepository(self.db)
    self.objects_folder = objects_folder or get_objects_folder()
+    self.temp_folder = temp_folder or get_temp_folder()
+    self.errors_folder = errors_folder or get_errors_folder()
+    self.ignored_folder = ignored_folder or get_ignored_folder()
  
  def initialize(self):
    self.document_repository.initialize()
@@ -117,6 +129,39 @@ class DocumentService:
    
    return path.read_bytes()
  
+  @staticmethod
+  def _get_safe_path(file_path):
+    """
+    If the path already exists, add a suffix to the filename.
+    Increment the suffix until a safe path is found.
+    :param file_path:
+    :return:
+    """
+    path = Path(file_path)
+    
+    # If the path doesn't exist, return it as is
+    if not path.exists():
+      return file_path
+    
+    # Split the filename and extension
+    stem = path.stem
+    suffix = path.suffix
+    directory = path.parent
+    
+    # Try incrementing numbers until a unique path is found
+    counter = 1
+    while True:
+      # Create new filename with counter
+      new_filename = f"{stem}_{counter}{suffix}"
+      new_path = os.path.join(directory, new_filename)
+      
+      # Check if this new path exists
+      if not os.path.exists(new_path):
+        return new_path
+      
+      # Increment counter for next attempt
+      counter += 1
+  
  def _get_document_path(self, file_hash):
    """

@@ -125,6 +170,9 @@ class DocumentService:
    """
    return os.path.join(self.objects_folder, file_hash[:24], file_hash)
  
+  def exists(self, file_hash):
+    return os.path.exists(self._get_document_path(file_hash))
+  
  def save_content_if_needed(self, file_hash, content: bytes):
    target_path = self._get_document_path(file_hash)
    if os.path.exists(target_path):
@@ -136,6 +184,18 @@ class DocumentService:
    with open(target_path, "wb") as f:
      f.write(content)
  
+  def move_to_errors(self, document_id, file_path):
+    logger.info(f"Moving file {file_path} to error folder")
+    error_file_name = f"{document_id}_{os.path.basename(file_path)}"
+    error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name))
+    shutil.move(file_path, error_file_path)
+  
+  def move_to_ignored(self, file_path, reason="Unknown"):
+    logger.info(f"Moving file {file_path} to ignored folder")
+    ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(file_path)
+    ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name))
+    shutil.move(file_path, ignored_file_path)
+  
  def create_document(
      self,
      file_path: str,
@@ -171,7 +231,15 @@ class DocumentService:
    detected_at = datetime.now()
    
    try:
+      logger.info(f"Creating Document for {file_path}")
+      # Skip the document if it already exists
+      same_document = self.document_repository.find_same_document(filename, file_hash)
+      if same_document is not None:
+        logger.info(f"  Document with same hash already exists. Skipping...")
+        self.move_to_ignored(file_path, f"already exists ({same_document.id})")
+      
      self.save_content_if_needed(file_hash, file_bytes)
+      logger.info(f"  Saved content to {self._get_document_path(file_hash)}")
      
      # Create FileDocument
      file_data = FileDocument(
@@ -188,6 +256,7 @@ class DocumentService:
      )
      
      created_file = self.document_repository.create_document(file_data)
+      logger.info(f"  Created document with id '{created_file.id}'")
      
      return created_file
    
@@ -195,6 +264,50 @@ class DocumentService:
      # Transaction will automatically rollback if supported
      raise PyMongoError(f"Failed to create document: {str(e)}")
  
+  def create_pdf(self, document_id: PyObjectId):
+    """
+    For all files, a controlled pdf version will be created for standard visualization and action
+    :return:
+    """
+    logger.info(f"Creating PDF document for {document_id}")
+    document = self.get_document_by_id(document_id)
+    if document is None:
+      logger.error(f"  Document not found")
+      raise ValueError(f"Document {document_id} not found")
+    
+    # try to find another document that has the same hash
+    document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
+    
+    # the pdf will be created only if it does not exist yet
+    if (document_with_same_hash is not None and
+        document_with_same_hash.pdf_file_hash and
+        self.exists(document_with_same_hash.pdf_file_hash)):
+      logger.info(f"Found document with same hash. Will use pdf {document_with_same_hash.pdf_file_hash}")
+      self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash})
+      return True
+    
+    # get the content of the file
+    logger.info(f"  No document with same hash found and valid pdf found. Will create new pdf")
+    file_bytes = self.get_document_content_by_hash(document.file_hash)
+    if file_bytes is None:
+      logger.error(f"Content for document {document_id} not found. hash = {document.file_hash}.")
+      return False
+    
+    # create the pdf file
+    temp_pdf_file = convert_to_pdf(self._get_document_path(document.file_hash), self.temp_folder)
+    pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file))
+    self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file))
+    logger.info(f"  Created new pdf file with hash {pdf_file_hash}")
+    
+    # remove the temporary file
+    os.remove(temp_pdf_file)
+    logger.info(f"  Removed temporary pdf file {temp_pdf_file}")
+    
+    # update the document
+    self.update_document(document_id, {"pdf_file_hash": pdf_file_hash})
+    
+    return True
+  
  def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
    """
    Retrieve a document by its ID.
@@ -219,6 +332,9 @@ class DocumentService:
    """
    return self.document_repository.find_document_by_hash(file_hash)
  
+  def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]:
+    return self.document_repository.find_document_with_pdf_hash(file_hash)
+  
  def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Retrieve a document by its file path.
--- a/src/file-processor/app/services/job_service.py
+++ b/src/file-processor/app/services/job_service.py
@@ -111,7 +111,9 @@ class JobService:
    current_job = self.repository.find_job_by_id(job_id)
    
    # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING,
+                              ProcessingStatus.COMPLETED,
+                              ProcessingStatus.FAILED):
      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
    
    # Update status
@@ -141,7 +143,7 @@ class JobService:
    current_job = self.repository.find_job_by_id(job_id)
    
    # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
    
    # Update status with error message
@@ -151,6 +153,11 @@ class JobService:
      error_message
    )
  
+  def update_job_status(self, job_id: PyObjectId,
+                        status: ProcessingStatus,
+                        error_message: str = None) -> ProcessingJob:
+    return self.repository.update_job_status(job_id, status, error_message)
+  
  def delete_job(self, job_id: PyObjectId) -> bool:
    """
    Delete a job from the database.
--- a/src/file-processor/app/utils/pdf_converter.py
+++ b/src/file-processor/app/utils/pdf_converter.py
--- a/src/worker/tasks/common/converter_utils.py
+++ b/src/worker/tasks/common/converter_utils.py
@@ -20,12 +20,19 @@ def detect_file_type(file_path: str) -> str:
      UnsupportedFileTypeError: If file type is not supported.
  """
  mime = magic.from_file(file_path, mime=True)
+  extension = Path(file_path).suffix
  if mime.startswith("text/"):
    return "text"
  elif mime.startswith("image/"):
    return "image"
  elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
    return "word"
+  elif mime == "application/pdf":
+    return "pdf"
+  elif mime == "application/vnd.ms-powerpoint":
+    return "powerpoint"
+  elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"):
+    return "image"
  else:
    raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")

--- a/src/worker/tasks/document_processing.py
+++ b/src/worker/tasks/document_processing.py
@@ -6,14 +6,14 @@ and update processing job statuses throughout the task lifecycle.
 """

 import logging
+import os
 from typing import Any, Dict

 from app.config import settings
 from app.database.connection import get_database
+from app.models.job import ProcessingStatus
 from app.services.document_service import DocumentService
 from app.services.job_service import JobService
-from tasks.common.document_utils import save_as_object
-from tasks.common.pdf_converter import convert_to_pdf
 from tasks.main import celery_app

 logger = logging.getLogger(__name__)
@@ -26,7 +26,8 @@ def get_services():
  return document_service, job_service


-@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+#@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+@celery_app.task(bind=True)
 def process_document(self, filepath: str) -> Dict[str, Any]:
  """
  Process a document file and extract its content.
@@ -46,41 +47,31 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
  Raises:
      Exception: Any processing error (will trigger retry)
  """
-  task_id = self.request.id 
+  task_id = self.request.id
  logger.info(f"Starting document processing task {task_id} for file: {filepath}")
  
  # get services
  document_service, job_service = get_services()
  
  job = None
+  document = None
  try:
    # Step 1: Create the document and a new job record for the document
    document = document_service.create_document(filepath)
    job = job_service.create_job(task_id=task_id, document_id=document.id)
    job_service.mark_job_as_started(job_id=job.id)
-    logger.info(f"Task {task_id} created for document {document.id} with file path: {filepath} and job id: {job.id}")
+    logger.info(f"Task {task_id} created for document {document.id} from file path: {filepath} and job id: {job.id}")
    
-    logger.info(f"Job {task_id} marked as PROCESSING")
-
-    raw_file_hash = save_as_object(filepath)
-    logger.info(f"Job {task_id} saved document as object: {raw_file_hash}")
+    logger.info(f"Task {task_id} : Creating associated PDF")
+    job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF)
+    document_service.create_pdf(document.id)
    
-    # Step 4: Create the pdf version of the document
-    pdf_file_hash = convert_to_pdf(filepath, raw_file_hash)
-    logger.info(f"Job {task_id} saved PDF with hash: {pdf_file_hash}")
-    
-    
-    
-    # Step 3: Mark job as started
-    
-    # Step 4: Create the pdf version of the document
-    pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
-    digest = save_as_object(pdf_file_path)
-    logger.info(f"Job {task_id} internal PDF file created: {digest}")
+    # remove the file from the watch folder
+    os.remove(filepath)
    
    # Step x: Mark job as completed
    job_service.mark_job_as_completed(job_id=job.id)
-    logger.info(f"Job {task_id} marked as COMPLETED")
+    logger.info(f"Task {task_id} marked as COMPLETED")
    
    return {
        "task_id": task_id,
@@ -99,6 +90,11 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
        logger.info(f"Job {task_id} marked as FAILED")
      else:
        logger.error(f"Failed to process {filepath}. error = {str(e)}")
+      
+      if document is not None:
+        document_service.move_to_errors(document.id, filepath)
+        logger.info(f"Moved file {filepath} to errors/{document.id}")
+    
    except Exception as job_error:
      logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
    
--- a/src/worker/tasks/main.py
+++ b/src/worker/tasks/main.py
@@ -41,15 +41,10 @@ celery_app.conf.update(

 def global_init(**kwargs):
  """Initialize global variables."""
-  logger.info(f"{'*' * 20}")
+  logger.info(f"{'*' * 45}")
  logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}")
-  logger.info(f"{'*' * 20}")
-  tmp_folder = settings.get_temp_folder()
-  if not os.path.exists(tmp_folder):
-    logger.info(f"Creating temporary folder: {tmp_folder}")
-    os.makedirs(tmp_folder)
-  else:
-    logger.info(f"Temporary folder already exists: {os.path.abspath(tmp_folder)}")
+  logger.info(f"{'*' * 45}")
+

 global_init()

--- a/tests/services/test_document_service.py
+++ b/tests/services/test_document_service.py
@@ -568,3 +568,137 @@ class TestFileTypeDetection:
    """Test unsupported file type raises ValueError."""
    with pytest.raises(ValueError, match="Unsupported file type"):
      document_service._detect_file_type("/path/to/document.xyz")
+
+
+class TestCreatePdf:
+  """Tests for create_pdf method."""
+  
+  @patch('app.services.document_service.convert_to_pdf')
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_create_pdf_successfully(
+      self,
+      mock_magic,
+      mock_convert_to_pdf,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test creating PDF from an existing document."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Mock the PDF conversion
+    pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
+    mock_convert_to_pdf.return_value = pdf_path
+    
+    # Write a sample PDF file that the conversion would create
+    pdf_content = b"This is PDF content"
+    os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
+    with open(pdf_path, "wb") as f:
+      f.write(pdf_content)
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is True
+    
+    # Get the updated document
+    updated_doc = document_service.get_document_by_id(created_doc.id)
+    assert updated_doc.pdf_file_hash is not None
+    
+    # Verify the PDF content was saved
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    assert updated_doc.pdf_file_hash == pdf_hash
+    
+    # Verify convert_to_pdf was called with correct arguments
+    doc_path = document_service._get_document_path(created_doc.file_hash)
+    mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
+    
+    # Verify content exists on disk
+    validate_file_saved(document_service, pdf_hash, pdf_content)
+    
+    # Verify PDF hash was added to document
+    updated_doc = document_service.get_document_by_id(created_doc.id)
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    assert updated_doc.pdf_file_hash == pdf_hash
+  
+  @patch('app.services.document_service.convert_to_pdf')
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_can_reuse_existing_pdf(
+      self,
+      mock_magic,
+      mock_convert_to_pdf,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test that if PDF already exists, it doesn't recreate it."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document first
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Create a fake PDF file and update the document
+    pdf_content = b"This is PDF content"
+    pdf_hash = document_service._calculate_file_hash(pdf_content)
+    document_service.save_content_if_needed(pdf_hash, pdf_content)
+    document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is True
+    
+    # Verify convert_to_pdf was NOT called
+    mock_convert_to_pdf.assert_not_called()
+  
+  def test_i_cannot_create_pdf_for_nonexistent_document(
+      self,
+      document_service
+  ):
+    """Test behavior when document ID doesn't exist."""
+    # Execute with random ObjectId
+    result = document_service.create_pdf(ObjectId())
+    
+    # Verify
+    assert result is False
+  
+  @patch('app.services.document_service.magic.from_buffer')
+  def test_i_cannot_create_pdf_when_file_content_missing(
+      self,
+      mock_magic,
+      document_service,
+      sample_file_bytes
+  ):
+    """Test behavior when file content doesn't exist."""
+    # Setup
+    mock_magic.return_value = "text/plain"
+    
+    # Create a document
+    created_doc = document_service.create_document(
+      "/test/test.txt",
+      sample_file_bytes,
+      "utf-8"
+    )
+    
+    # Simulate missing content by removing file
+    file_path = document_service._get_document_path(created_doc.file_hash)
+    os.remove(file_path)
+    
+    # Execute
+    result = document_service.create_pdf(created_doc.id)
+    
+    # Verify
+    assert result is False
--- a/tests/services/test_job_service.py
+++ b/tests/services/test_job_service.py
@@ -417,6 +417,25 @@ class TestUpdateStatus:
    # Verify exception details
    assert exc_info.value.current_status == ProcessingStatus.FAILED
    assert exc_info.value.target_status == ProcessingStatus.FAILED
+  
+  def test_i_can_update_job_status(
+      self,
+      job_service,
+      sample_document_id,
+      sample_task_id
+  ):
+    """Test that failed job cannot be marked as failed again."""
+    # Create, start, and fail a job
+    created_job = job_service.create_job(sample_document_id, sample_task_id)
+    job_service.mark_job_as_started(created_job.id)
+    
+    # Execute without error message
+    result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
+    
+    # Verify status transition
+    assert result is not None
+    assert result.status == ProcessingStatus.SAVING_OBJECT
+    assert result.error_message is None


 class TestDeleteJob:
--- a/tests/common/test_pdf_converter.py
+++ b/tests/common/test_pdf_converter.py
@@ -4,7 +4,7 @@ from pathlib import Path

 import pytest

-from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
+from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter


@pytest.fixture
@@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir):
  input_txt.write_text("Hello World!\nThis is a test.")
  
  converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
  
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")


 def test_i_can_convert_image_to_pdf(temp_dir):
@@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir):
  image.save(input_img)
  
  converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
  
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")


 def test_i_can_convert_word_to_pdf(temp_dir):
@@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir):
  doc.save(input_docx)
  
  converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
  
-  assert Path(output_pdf).exists()
-  assert output_pdf.endswith(".pdf")
+  assert Path(converter.output_path).exists()
+  assert str(converter.output_path).endswith(".pdf")