I can put a new file and create the associated pdf

2025-10-05 23:54:59 +02:00
parent bd52f2d296
commit 8ae9754fde
14 changed files with 376 additions and 45 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,6 +40,8 @@ services:
      - ./src/worker/tasks:/app/tasks          # <- Added: shared access to worker tasks
      - ./volumes/watched_files:/watched_files
      - ./volumes/objects:/objects
      - ./volumes/errors:/errors
      - ./volumes/ignored:/ignored
    depends_on:
      - redis
      - mongodb
@@ -62,6 +64,8 @@ services:
      - ./src/file-processor/app:/app/app     # <- Added: shared access file-processor app
      - ./volumes/watched_files:/watched_files
      - ./volumes/objects:/objects
      - ./volumes/errors:/errors
      - ./volumes/ignored:/ignored
    depends_on:
      - redis
      - mongodb
--- a/src/file-processor/app/config/settings.py
+++ b/src/file-processor/app/config/settings.py
@@ -106,3 +106,13 @@ def get_watch_folder() -> str:
 def get_temp_folder() -> str:
  """Directory to store temporary files"""
  return os.getenv("TEMP_DIRECTORY", "/tmp")
 def get_errors_folder() -> str:
  """Directory to store temporary files"""
  return os.getenv("ERRORS_DIRECTORY", "/errors")
 def get_ignored_folder() -> str:
  """Directory to store temporary files"""
  return os.getenv("IGNORED_DIRECTORY", "/ignored")
--- a/src/file-processor/app/database/repositories/document_repository.py
+++ b/src/file-processor/app/database/repositories/document_repository.py
@@ -130,6 +130,47 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]:
    """
    Find file document by file hash with a pdf_file_hash set (not None).
    Args:
        file_hash (str): SHA256 hash of file content
    Returns:
        FileDocument or None: File document if found, None otherwise
    """
    try:
      file_doc = self.collection.find_one({"file_hash": file_hash,
                                           "pdf_file_hash": {"$ne": None}})
      if file_doc:
        return FileDocument(**file_doc)
      return None
    except PyMongoError:
      return None
  def find_same_document(self, filename: str, file_hash: str):
    """
    Find document with the same file_name and the same file hash
    Args:
        filename (str):
        file_hash (str): SHA256 hash of file content
    Returns:
        FileDocument or None: File document if found, None otherwise
    """
    try:
      file_doc = self.collection.find_one({"file_hash": file_hash,
                                           "filename": filename})
      if file_doc:
        return FileDocument(**file_doc)
      return None
    except PyMongoError:
      return None
  def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Find file document by exact filepath.
--- a/src/file-processor/app/file_watcher.py
+++ b/src/file-processor/app/file_watcher.py
@@ -30,7 +30,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
  dispatching Celery tasks, and managing processing jobs.
  """
-  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
+  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'}
  def __init__(self, document_service: DocumentService, job_service: JobService):
    """
@@ -59,6 +59,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
    if file_extension not in self.SUPPORTED_EXTENSIONS:
      logger.info(f"Ignoring unsupported file type: {filepath}")
      self.document_service.move_to_ignored(filepath, "unsupported file type")
      return
    logger.info(f"Processing new file: {filepath}")
--- a/src/file-processor/app/models/document.py
+++ b/src/file-processor/app/models/document.py
@@ -49,6 +49,7 @@ class FileDocument(BaseModel):
  metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
  detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
  file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
  pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content")
  encoding: str = Field(default="utf-8", description="Character encoding for text files")
  file_size: int = Field(..., ge=0, description="File size in bytes")
  mime_type: str = Field(..., description="MIME type detected")
--- a/src/file-processor/app/services/document_service.py
+++ b/src/file-processor/app/services/document_service.py
@@ -6,7 +6,9 @@ while maintaining data consistency through MongoDB transactions.
 """
 import hashlib
 import logging
 import os
 import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Dict, Any
@@ -14,13 +16,16 @@ from typing import List, Optional, Dict, Any
 import magic
 from pymongo.errors import PyMongoError
-from app.config.settings import get_objects_folder
+from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder
 from app.database.repositories.document_repository import FileDocumentRepository
 from app.models.document import (
  FileDocument,
  FileType,
 )
 from app.models.types import PyObjectId
 from app.utils.pdf_converter import convert_to_pdf
 logger = logging.getLogger(__name__)
 class DocumentService:
@@ -31,7 +36,11 @@ class DocumentService:
  and their content while ensuring data consistency through transactions.
  """
-  def __init__(self, database, objects_folder: str = None):
+  def __init__(self, database,
               objects_folder: str = None,
               temp_folder: str = None,
               errors_folder: str = None,
               ignored_folder: str = None):
    """
    Initialize the document service with repository dependencies.
@@ -43,6 +52,9 @@ class DocumentService:
    self.db = database
    self.document_repository = FileDocumentRepository(self.db)
    self.objects_folder = objects_folder or get_objects_folder()
    self.temp_folder = temp_folder or get_temp_folder()
    self.errors_folder = errors_folder or get_errors_folder()
    self.ignored_folder = ignored_folder or get_ignored_folder()
  def initialize(self):
    self.document_repository.initialize()
@@ -117,6 +129,39 @@ class DocumentService:
    return path.read_bytes()
  @staticmethod
  def _get_safe_path(file_path):
    """
    If the path already exists, add a suffix to the filename.
    Increment the suffix until a safe path is found.
    :param file_path:
    :return:
    """
    path = Path(file_path)
    # If the path doesn't exist, return it as is
    if not path.exists():
      return file_path
    # Split the filename and extension
    stem = path.stem
    suffix = path.suffix
    directory = path.parent
    # Try incrementing numbers until a unique path is found
    counter = 1
    while True:
      # Create new filename with counter
      new_filename = f"{stem}_{counter}{suffix}"
      new_path = os.path.join(directory, new_filename)
      # Check if this new path exists
      if not os.path.exists(new_path):
        return new_path
      # Increment counter for next attempt
      counter += 1
  def _get_document_path(self, file_hash):
    """
@@ -125,6 +170,9 @@ class DocumentService:
    """
    return os.path.join(self.objects_folder, file_hash[:24], file_hash)
  def exists(self, file_hash):
    return os.path.exists(self._get_document_path(file_hash))
  def save_content_if_needed(self, file_hash, content: bytes):
    target_path = self._get_document_path(file_hash)
    if os.path.exists(target_path):
@@ -136,6 +184,18 @@ class DocumentService:
    with open(target_path, "wb") as f:
      f.write(content)
  def move_to_errors(self, document_id, file_path):
    logger.info(f"Moving file {file_path} to error folder")
    error_file_name = f"{document_id}_{os.path.basename(file_path)}"
    error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name))
    shutil.move(file_path, error_file_path)
  def move_to_ignored(self, file_path, reason="Unknown"):
    logger.info(f"Moving file {file_path} to ignored folder")
    ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(file_path)
    ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name))
    shutil.move(file_path, ignored_file_path)
  def create_document(
      self,
      file_path: str,
@@ -171,7 +231,15 @@ class DocumentService:
    detected_at = datetime.now()
    try:
      logger.info(f"Creating Document for {file_path}")
      # Skip the document if it already exists
      same_document = self.document_repository.find_same_document(filename, file_hash)
      if same_document is not None:
        logger.info(f"  Document with same hash already exists. Skipping...")
        self.move_to_ignored(file_path, f"already exists ({same_document.id})")
      self.save_content_if_needed(file_hash, file_bytes)
      logger.info(f"  Saved content to {self._get_document_path(file_hash)}")
      # Create FileDocument
      file_data = FileDocument(
@@ -188,6 +256,7 @@ class DocumentService:
      )
      created_file = self.document_repository.create_document(file_data)
      logger.info(f"  Created document with id '{created_file.id}'")
      return created_file
@@ -195,6 +264,50 @@ class DocumentService:
      # Transaction will automatically rollback if supported
      raise PyMongoError(f"Failed to create document: {str(e)}")
  def create_pdf(self, document_id: PyObjectId):
    """
    For all files, a controlled pdf version will be created for standard visualization and action
    :return:
    """
    logger.info(f"Creating PDF document for {document_id}")
    document = self.get_document_by_id(document_id)
    if document is None:
      logger.error(f"  Document not found")
      raise ValueError(f"Document {document_id} not found")
    # try to find another document that has the same hash
    document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
    # the pdf will be created only if it does not exist yet
    if (document_with_same_hash is not None and
        document_with_same_hash.pdf_file_hash and
        self.exists(document_with_same_hash.pdf_file_hash)):
      logger.info(f"Found document with same hash. Will use pdf {document_with_same_hash.pdf_file_hash}")
      self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash})
      return True
    # get the content of the file
    logger.info(f"  No document with same hash found and valid pdf found. Will create new pdf")
    file_bytes = self.get_document_content_by_hash(document.file_hash)
    if file_bytes is None:
      logger.error(f"Content for document {document_id} not found. hash = {document.file_hash}.")
      return False
    # create the pdf file
    temp_pdf_file = convert_to_pdf(self._get_document_path(document.file_hash), self.temp_folder)
    pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file))
    self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file))
    logger.info(f"  Created new pdf file with hash {pdf_file_hash}")
    # remove the temporary file
    os.remove(temp_pdf_file)
    logger.info(f"  Removed temporary pdf file {temp_pdf_file}")
    # update the document
    self.update_document(document_id, {"pdf_file_hash": pdf_file_hash})
    return True
  def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
    """
    Retrieve a document by its ID.
@@ -219,6 +332,9 @@ class DocumentService:
    """
    return self.document_repository.find_document_by_hash(file_hash)
  def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]:
    return self.document_repository.find_document_with_pdf_hash(file_hash)
  def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Retrieve a document by its file path.
--- a/src/file-processor/app/services/job_service.py
+++ b/src/file-processor/app/services/job_service.py
@@ -111,7 +111,9 @@ class JobService:
    current_job = self.repository.find_job_by_id(job_id)
    # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING,
                              ProcessingStatus.COMPLETED,
                              ProcessingStatus.FAILED):
      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
    # Update status
@@ -141,7 +143,7 @@ class JobService:
    current_job = self.repository.find_job_by_id(job_id)
    # Validate status transition
-    if current_job.status != ProcessingStatus.PROCESSING:
+    if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
    # Update status with error message
@@ -151,6 +153,11 @@ class JobService:
      error_message
    )
  def update_job_status(self, job_id: PyObjectId,
                        status: ProcessingStatus,
                        error_message: str = None) -> ProcessingJob:
    return self.repository.update_job_status(job_id, status, error_message)
  def delete_job(self, job_id: PyObjectId) -> bool:
    """
    Delete a job from the database.
--- a/src/file-processor/app/utils/pdf_converter.py
+++ b/src/file-processor/app/utils/pdf_converter.py
--- a/src/worker/tasks/common/converter_utils.py
+++ b/src/worker/tasks/common/converter_utils.py
@@ -20,12 +20,19 @@ def detect_file_type(file_path: str) -> str:
      UnsupportedFileTypeError: If file type is not supported.
  """
  mime = magic.from_file(file_path, mime=True)
  extension = Path(file_path).suffix
  if mime.startswith("text/"):
    return "text"
  elif mime.startswith("image/"):
    return "image"
  elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
    return "word"
  elif mime == "application/pdf":
    return "pdf"
  elif mime == "application/vnd.ms-powerpoint":
    return "powerpoint"
  elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"):
    return "image"
  else:
    raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
--- a/src/worker/tasks/document_processing.py
+++ b/src/worker/tasks/document_processing.py
@@ -6,14 +6,14 @@ and update processing job statuses throughout the task lifecycle.
 """
 import logging
 import os
 from typing import Any, Dict
 from app.config import settings
 from app.database.connection import get_database
 from app.models.job import ProcessingStatus
 from app.services.document_service import DocumentService
 from app.services.job_service import JobService
 from tasks.common.document_utils import save_as_object
 from tasks.common.pdf_converter import convert_to_pdf
 from tasks.main import celery_app
 logger = logging.getLogger(__name__)
@@ -26,7 +26,8 @@ def get_services():
  return document_service, job_service
-@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+#@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
@celery_app.task(bind=True)
 def process_document(self, filepath: str) -> Dict[str, Any]:
  """
  Process a document file and extract its content.
@@ -53,34 +54,24 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
  document_service, job_service = get_services()
  job = None
  document = None
  try:
    # Step 1: Create the document and a new job record for the document
    document = document_service.create_document(filepath)
    job = job_service.create_job(task_id=task_id, document_id=document.id)
    job_service.mark_job_as_started(job_id=job.id)
-    logger.info(f"Task {task_id} created for document {document.id} with file path: {filepath} and job id: {job.id}")
+    logger.info(f"Task {task_id} created for document {document.id} from file path: {filepath} and job id: {job.id}")
-    logger.info(f"Job {task_id} marked as PROCESSING")
+    logger.info(f"Task {task_id} : Creating associated PDF")
    job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF)
    document_service.create_pdf(document.id)
-    raw_file_hash = save_as_object(filepath)
+    # remove the file from the watch folder
-    logger.info(f"Job {task_id} saved document as object: {raw_file_hash}")
+    os.remove(filepath)
    # Step 4: Create the pdf version of the document
    pdf_file_hash = convert_to_pdf(filepath, raw_file_hash)
    logger.info(f"Job {task_id} saved PDF with hash: {pdf_file_hash}")
    # Step 3: Mark job as started
    # Step 4: Create the pdf version of the document
    pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
    digest = save_as_object(pdf_file_path)
    logger.info(f"Job {task_id} internal PDF file created: {digest}")
    # Step x: Mark job as completed
    job_service.mark_job_as_completed(job_id=job.id)
-    logger.info(f"Job {task_id} marked as COMPLETED")
+    logger.info(f"Task {task_id} marked as COMPLETED")
    return {
        "task_id": task_id,
@@ -99,6 +90,11 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
        logger.info(f"Job {task_id} marked as FAILED")
      else:
        logger.error(f"Failed to process {filepath}. error = {str(e)}")
      if document is not None:
        document_service.move_to_errors(document.id, filepath)
        logger.info(f"Moved file {filepath} to errors/{document.id}")
    except Exception as job_error:
      logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
--- a/src/worker/tasks/main.py
+++ b/src/worker/tasks/main.py
@@ -41,15 +41,10 @@ celery_app.conf.update(
 def global_init(**kwargs):
  """Initialize global variables."""
-  logger.info(f"{'*' * 20}")
+  logger.info(f"{'*' * 45}")
  logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}")
-  logger.info(f"{'*' * 20}")
+  logger.info(f"{'*' * 45}")
-  tmp_folder = settings.get_temp_folder()
+
  if not os.path.exists(tmp_folder):
    logger.info(f"Creating temporary folder: {tmp_folder}")
    os.makedirs(tmp_folder)
  else:
    logger.info(f"Temporary folder already exists: {os.path.abspath(tmp_folder)}")
 global_init()
--- a/tests/services/test_document_service.py
+++ b/tests/services/test_document_service.py
@@ -568,3 +568,137 @@ class TestFileTypeDetection:
    """Test unsupported file type raises ValueError."""
    with pytest.raises(ValueError, match="Unsupported file type"):
      document_service._detect_file_type("/path/to/document.xyz")
 class TestCreatePdf:
  """Tests for create_pdf method."""
  @patch('app.services.document_service.convert_to_pdf')
  @patch('app.services.document_service.magic.from_buffer')
  def test_i_can_create_pdf_successfully(
      self,
      mock_magic,
      mock_convert_to_pdf,
      document_service,
      sample_file_bytes
  ):
    """Test creating PDF from an existing document."""
    # Setup
    mock_magic.return_value = "text/plain"
    # Create a document first
    created_doc = document_service.create_document(
      "/test/test.txt",
      sample_file_bytes,
      "utf-8"
    )
    # Mock the PDF conversion
    pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
    mock_convert_to_pdf.return_value = pdf_path
    # Write a sample PDF file that the conversion would create
    pdf_content = b"This is PDF content"
    os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
    with open(pdf_path, "wb") as f:
      f.write(pdf_content)
    # Execute
    result = document_service.create_pdf(created_doc.id)
    # Verify
    assert result is True
    # Get the updated document
    updated_doc = document_service.get_document_by_id(created_doc.id)
    assert updated_doc.pdf_file_hash is not None
    # Verify the PDF content was saved
    pdf_hash = document_service._calculate_file_hash(pdf_content)
    assert updated_doc.pdf_file_hash == pdf_hash
    # Verify convert_to_pdf was called with correct arguments
    doc_path = document_service._get_document_path(created_doc.file_hash)
    mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
    # Verify content exists on disk
    validate_file_saved(document_service, pdf_hash, pdf_content)
    # Verify PDF hash was added to document
    updated_doc = document_service.get_document_by_id(created_doc.id)
    pdf_hash = document_service._calculate_file_hash(pdf_content)
    assert updated_doc.pdf_file_hash == pdf_hash
  @patch('app.services.document_service.convert_to_pdf')
  @patch('app.services.document_service.magic.from_buffer')
  def test_i_can_reuse_existing_pdf(
      self,
      mock_magic,
      mock_convert_to_pdf,
      document_service,
      sample_file_bytes
  ):
    """Test that if PDF already exists, it doesn't recreate it."""
    # Setup
    mock_magic.return_value = "text/plain"
    # Create a document first
    created_doc = document_service.create_document(
      "/test/test.txt",
      sample_file_bytes,
      "utf-8"
    )
    # Create a fake PDF file and update the document
    pdf_content = b"This is PDF content"
    pdf_hash = document_service._calculate_file_hash(pdf_content)
    document_service.save_content_if_needed(pdf_hash, pdf_content)
    document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
    # Execute
    result = document_service.create_pdf(created_doc.id)
    # Verify
    assert result is True
    # Verify convert_to_pdf was NOT called
    mock_convert_to_pdf.assert_not_called()
  def test_i_cannot_create_pdf_for_nonexistent_document(
      self,
      document_service
  ):
    """Test behavior when document ID doesn't exist."""
    # Execute with random ObjectId
    result = document_service.create_pdf(ObjectId())
    # Verify
    assert result is False
  @patch('app.services.document_service.magic.from_buffer')
  def test_i_cannot_create_pdf_when_file_content_missing(
      self,
      mock_magic,
      document_service,
      sample_file_bytes
  ):
    """Test behavior when file content doesn't exist."""
    # Setup
    mock_magic.return_value = "text/plain"
    # Create a document
    created_doc = document_service.create_document(
      "/test/test.txt",
      sample_file_bytes,
      "utf-8"
    )
    # Simulate missing content by removing file
    file_path = document_service._get_document_path(created_doc.file_hash)
    os.remove(file_path)
    # Execute
    result = document_service.create_pdf(created_doc.id)
    # Verify
    assert result is False
--- a/tests/services/test_job_service.py
+++ b/tests/services/test_job_service.py
@@ -418,6 +418,25 @@ class TestUpdateStatus:
    assert exc_info.value.current_status == ProcessingStatus.FAILED
    assert exc_info.value.target_status == ProcessingStatus.FAILED
  def test_i_can_update_job_status(
      self,
      job_service,
      sample_document_id,
      sample_task_id
  ):
    """Test that failed job cannot be marked as failed again."""
    # Create, start, and fail a job
    created_job = job_service.create_job(sample_document_id, sample_task_id)
    job_service.mark_job_as_started(created_job.id)
    # Execute without error message
    result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
    # Verify status transition
    assert result is not None
    assert result.status == ProcessingStatus.SAVING_OBJECT
    assert result.error_message is None
 class TestDeleteJob:
  """Tests for delete_job method."""
--- a/tests/common/test_pdf_converter.py
+++ b/tests/common/test_pdf_converter.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import pytest
-from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
+from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
@pytest.fixture
@@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir):
  input_txt.write_text("Hello World!\nThis is a test.")
  converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
-  assert Path(output_pdf).exists()
+  assert Path(converter.output_path).exists()
-  assert output_pdf.endswith(".pdf")
+  assert str(converter.output_path).endswith(".pdf")
 def test_i_can_convert_image_to_pdf(temp_dir):
@@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir):
  image.save(input_img)
  converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
-  assert Path(output_pdf).exists()
+  assert Path(converter.output_path).exists()
-  assert output_pdf.endswith(".pdf")
+  assert str(converter.output_path).endswith(".pdf")
 def test_i_can_convert_word_to_pdf(temp_dir):
@@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir):
  doc.save(input_docx)
  converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
-  output_pdf = converter.convert()
+  converter.convert()
-  assert Path(output_pdf).exists()
+  assert Path(converter.output_path).exists()
-  assert output_pdf.endswith(".pdf")
+  assert str(converter.output_path).endswith(".pdf")