From 8ae9754fdeaa8e2dca247b62ee3f5f3ef8b274f9 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sun, 5 Oct 2025 23:54:59 +0200 Subject: [PATCH] I can put a new file and create the associated pdf --- docker-compose.yml | 4 + src/file-processor/app/config/settings.py | 10 ++ .../repositories/document_repository.py | 41 ++++++ src/file-processor/app/file_watcher.py | 3 +- src/file-processor/app/models/document.py | 1 + .../app/services/document_service.py | 120 +++++++++++++++- .../app/services/job_service.py | 11 +- .../app/utils}/pdf_converter.py | 0 src/worker/tasks/common/converter_utils.py | 7 + src/worker/tasks/document_processing.py | 40 +++--- src/worker/tasks/main.py | 11 +- tests/services/test_document_service.py | 134 ++++++++++++++++++ tests/services/test_job_service.py | 19 +++ tests/{common => utils}/test_pdf_converter.py | 20 +-- 14 files changed, 376 insertions(+), 45 deletions(-) rename src/{worker/tasks/common => file-processor/app/utils}/pdf_converter.py (100%) rename tests/{common => utils}/test_pdf_converter.py (66%) diff --git a/docker-compose.yml b/docker-compose.yml index a702201..170967b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,6 +40,8 @@ services: - ./src/worker/tasks:/app/tasks # <- Added: shared access to worker tasks - ./volumes/watched_files:/watched_files - ./volumes/objects:/objects + - ./volumes/errors:/errors + - ./volumes/ignored:/ignored depends_on: - redis - mongodb @@ -62,6 +64,8 @@ services: - ./src/file-processor/app:/app/app # <- Added: shared access file-processor app - ./volumes/watched_files:/watched_files - ./volumes/objects:/objects + - ./volumes/errors:/errors + - ./volumes/ignored:/ignored depends_on: - redis - mongodb diff --git a/src/file-processor/app/config/settings.py b/src/file-processor/app/config/settings.py index 94cf48a..ab29e0c 100644 --- a/src/file-processor/app/config/settings.py +++ b/src/file-processor/app/config/settings.py @@ -106,3 +106,13 @@ def get_watch_folder() -> str: def get_temp_folder() -> str: """Directory to store temporary files""" return os.getenv("TEMP_DIRECTORY", "/tmp") + + +def get_errors_folder() -> str: + """Directory to store temporary files""" + return os.getenv("ERRORS_DIRECTORY", "/errors") + + +def get_ignored_folder() -> str: + """Directory to store temporary files""" + return os.getenv("IGNORED_DIRECTORY", "/ignored") diff --git a/src/file-processor/app/database/repositories/document_repository.py b/src/file-processor/app/database/repositories/document_repository.py index 0d8b6dc..e0e28c3 100644 --- a/src/file-processor/app/database/repositories/document_repository.py +++ b/src/file-processor/app/database/repositories/document_repository.py @@ -130,6 +130,47 @@ class FileDocumentRepository: except PyMongoError: return None + def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]: + """ + Find file document by file hash with a pdf_file_hash set (not None). + + Args: + file_hash (str): SHA256 hash of file content + + Returns: + FileDocument or None: File document if found, None otherwise + """ + try: + file_doc = self.collection.find_one({"file_hash": file_hash, + "pdf_file_hash": {"$ne": None}}) + if file_doc: + return FileDocument(**file_doc) + return None + + except PyMongoError: + return None + + def find_same_document(self, filename: str, file_hash: str): + """ + Find document with the same file_name and the same file hash + + Args: + filename (str): + file_hash (str): SHA256 hash of file content + + Returns: + FileDocument or None: File document if found, None otherwise + """ + try: + file_doc = self.collection.find_one({"file_hash": file_hash, + "filename": filename}) + if file_doc: + return FileDocument(**file_doc) + return None + + except PyMongoError: + return None + def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]: """ Find file document by exact filepath. diff --git a/src/file-processor/app/file_watcher.py b/src/file-processor/app/file_watcher.py index f3b9eb0..821dc59 100644 --- a/src/file-processor/app/file_watcher.py +++ b/src/file-processor/app/file_watcher.py @@ -30,7 +30,7 @@ class DocumentFileEventHandler(FileSystemEventHandler): dispatching Celery tasks, and managing processing jobs. """ - SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'} + SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'} def __init__(self, document_service: DocumentService, job_service: JobService): """ @@ -59,6 +59,7 @@ class DocumentFileEventHandler(FileSystemEventHandler): if file_extension not in self.SUPPORTED_EXTENSIONS: logger.info(f"Ignoring unsupported file type: {filepath}") + self.document_service.move_to_ignored(filepath, "unsupported file type") return logger.info(f"Processing new file: {filepath}") diff --git a/src/file-processor/app/models/document.py b/src/file-processor/app/models/document.py index 19d9bfe..105bffd 100644 --- a/src/file-processor/app/models/document.py +++ b/src/file-processor/app/models/document.py @@ -49,6 +49,7 @@ class FileDocument(BaseModel): metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata") detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected") file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content") + pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content") encoding: str = Field(default="utf-8", description="Character encoding for text files") file_size: int = Field(..., ge=0, description="File size in bytes") mime_type: str = Field(..., description="MIME type detected") diff --git a/src/file-processor/app/services/document_service.py b/src/file-processor/app/services/document_service.py index 9b30c8e..f20dc90 100644 --- a/src/file-processor/app/services/document_service.py +++ b/src/file-processor/app/services/document_service.py @@ -6,7 +6,9 @@ while maintaining data consistency through MongoDB transactions. """ import hashlib +import logging import os +import shutil from datetime import datetime from pathlib import Path from typing import List, Optional, Dict, Any @@ -14,13 +16,16 @@ from typing import List, Optional, Dict, Any import magic from pymongo.errors import PyMongoError -from app.config.settings import get_objects_folder +from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder from app.database.repositories.document_repository import FileDocumentRepository from app.models.document import ( FileDocument, FileType, ) from app.models.types import PyObjectId +from app.utils.pdf_converter import convert_to_pdf + +logger = logging.getLogger(__name__) class DocumentService: @@ -31,7 +36,11 @@ class DocumentService: and their content while ensuring data consistency through transactions. """ - def __init__(self, database, objects_folder: str = None): + def __init__(self, database, + objects_folder: str = None, + temp_folder: str = None, + errors_folder: str = None, + ignored_folder: str = None): """ Initialize the document service with repository dependencies. @@ -43,6 +52,9 @@ class DocumentService: self.db = database self.document_repository = FileDocumentRepository(self.db) self.objects_folder = objects_folder or get_objects_folder() + self.temp_folder = temp_folder or get_temp_folder() + self.errors_folder = errors_folder or get_errors_folder() + self.ignored_folder = ignored_folder or get_ignored_folder() def initialize(self): self.document_repository.initialize() @@ -117,6 +129,39 @@ class DocumentService: return path.read_bytes() + @staticmethod + def _get_safe_path(file_path): + """ + If the path already exists, add a suffix to the filename. + Increment the suffix until a safe path is found. + :param file_path: + :return: + """ + path = Path(file_path) + + # If the path doesn't exist, return it as is + if not path.exists(): + return file_path + + # Split the filename and extension + stem = path.stem + suffix = path.suffix + directory = path.parent + + # Try incrementing numbers until a unique path is found + counter = 1 + while True: + # Create new filename with counter + new_filename = f"{stem}_{counter}{suffix}" + new_path = os.path.join(directory, new_filename) + + # Check if this new path exists + if not os.path.exists(new_path): + return new_path + + # Increment counter for next attempt + counter += 1 + def _get_document_path(self, file_hash): """ @@ -125,6 +170,9 @@ class DocumentService: """ return os.path.join(self.objects_folder, file_hash[:24], file_hash) + def exists(self, file_hash): + return os.path.exists(self._get_document_path(file_hash)) + def save_content_if_needed(self, file_hash, content: bytes): target_path = self._get_document_path(file_hash) if os.path.exists(target_path): @@ -136,6 +184,18 @@ class DocumentService: with open(target_path, "wb") as f: f.write(content) + def move_to_errors(self, document_id, file_path): + logger.info(f"Moving file {file_path} to error folder") + error_file_name = f"{document_id}_{os.path.basename(file_path)}" + error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name)) + shutil.move(file_path, error_file_path) + + def move_to_ignored(self, file_path, reason="Unknown"): + logger.info(f"Moving file {file_path} to ignored folder") + ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(file_path) + ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name)) + shutil.move(file_path, ignored_file_path) + def create_document( self, file_path: str, @@ -171,7 +231,15 @@ class DocumentService: detected_at = datetime.now() try: + logger.info(f"Creating Document for {file_path}") + # Skip the document if it already exists + same_document = self.document_repository.find_same_document(filename, file_hash) + if same_document is not None: + logger.info(f" Document with same hash already exists. Skipping...") + self.move_to_ignored(file_path, f"already exists ({same_document.id})") + self.save_content_if_needed(file_hash, file_bytes) + logger.info(f" Saved content to {self._get_document_path(file_hash)}") # Create FileDocument file_data = FileDocument( @@ -188,6 +256,7 @@ class DocumentService: ) created_file = self.document_repository.create_document(file_data) + logger.info(f" Created document with id '{created_file.id}'") return created_file @@ -195,6 +264,50 @@ class DocumentService: # Transaction will automatically rollback if supported raise PyMongoError(f"Failed to create document: {str(e)}") + def create_pdf(self, document_id: PyObjectId): + """ + For all files, a controlled pdf version will be created for standard visualization and action + :return: + """ + logger.info(f"Creating PDF document for {document_id}") + document = self.get_document_by_id(document_id) + if document is None: + logger.error(f" Document not found") + raise ValueError(f"Document {document_id} not found") + + # try to find another document that has the same hash + document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash) + + # the pdf will be created only if it does not exist yet + if (document_with_same_hash is not None and + document_with_same_hash.pdf_file_hash and + self.exists(document_with_same_hash.pdf_file_hash)): + logger.info(f"Found document with same hash. Will use pdf {document_with_same_hash.pdf_file_hash}") + self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash}) + return True + + # get the content of the file + logger.info(f" No document with same hash found and valid pdf found. Will create new pdf") + file_bytes = self.get_document_content_by_hash(document.file_hash) + if file_bytes is None: + logger.error(f"Content for document {document_id} not found. hash = {document.file_hash}.") + return False + + # create the pdf file + temp_pdf_file = convert_to_pdf(self._get_document_path(document.file_hash), self.temp_folder) + pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file)) + self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file)) + logger.info(f" Created new pdf file with hash {pdf_file_hash}") + + # remove the temporary file + os.remove(temp_pdf_file) + logger.info(f" Removed temporary pdf file {temp_pdf_file}") + + # update the document + self.update_document(document_id, {"pdf_file_hash": pdf_file_hash}) + + return True + def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]: """ Retrieve a document by its ID. @@ -219,6 +332,9 @@ class DocumentService: """ return self.document_repository.find_document_by_hash(file_hash) + def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]: + return self.document_repository.find_document_with_pdf_hash(file_hash) + def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]: """ Retrieve a document by its file path. diff --git a/src/file-processor/app/services/job_service.py b/src/file-processor/app/services/job_service.py index ff55c6d..d7b4bc1 100644 --- a/src/file-processor/app/services/job_service.py +++ b/src/file-processor/app/services/job_service.py @@ -111,7 +111,9 @@ class JobService: current_job = self.repository.find_job_by_id(job_id) # Validate status transition - if current_job.status != ProcessingStatus.PROCESSING: + if current_job.status in (ProcessingStatus.PENDING, + ProcessingStatus.COMPLETED, + ProcessingStatus.FAILED): raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED) # Update status @@ -141,7 +143,7 @@ class JobService: current_job = self.repository.find_job_by_id(job_id) # Validate status transition - if current_job.status != ProcessingStatus.PROCESSING: + if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED): raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED) # Update status with error message @@ -151,6 +153,11 @@ class JobService: error_message ) + def update_job_status(self, job_id: PyObjectId, + status: ProcessingStatus, + error_message: str = None) -> ProcessingJob: + return self.repository.update_job_status(job_id, status, error_message) + def delete_job(self, job_id: PyObjectId) -> bool: """ Delete a job from the database. diff --git a/src/worker/tasks/common/pdf_converter.py b/src/file-processor/app/utils/pdf_converter.py similarity index 100% rename from src/worker/tasks/common/pdf_converter.py rename to src/file-processor/app/utils/pdf_converter.py diff --git a/src/worker/tasks/common/converter_utils.py b/src/worker/tasks/common/converter_utils.py index 61f65b2..e85e300 100644 --- a/src/worker/tasks/common/converter_utils.py +++ b/src/worker/tasks/common/converter_utils.py @@ -20,12 +20,19 @@ def detect_file_type(file_path: str) -> str: UnsupportedFileTypeError: If file type is not supported. """ mime = magic.from_file(file_path, mime=True) + extension = Path(file_path).suffix if mime.startswith("text/"): return "text" elif mime.startswith("image/"): return "image" elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",): return "word" + elif mime == "application/pdf": + return "pdf" + elif mime == "application/vnd.ms-powerpoint": + return "powerpoint" + elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"): + return "image" else: raise UnsupportedFileTypeError(f"Unsupported file type: {mime}") diff --git a/src/worker/tasks/document_processing.py b/src/worker/tasks/document_processing.py index 6524901..b19d5fd 100644 --- a/src/worker/tasks/document_processing.py +++ b/src/worker/tasks/document_processing.py @@ -6,14 +6,14 @@ and update processing job statuses throughout the task lifecycle. """ import logging +import os from typing import Any, Dict from app.config import settings from app.database.connection import get_database +from app.models.job import ProcessingStatus from app.services.document_service import DocumentService from app.services.job_service import JobService -from tasks.common.document_utils import save_as_object -from tasks.common.pdf_converter import convert_to_pdf from tasks.main import celery_app logger = logging.getLogger(__name__) @@ -26,7 +26,8 @@ def get_services(): return document_service, job_service -@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60}) +#@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60}) +@celery_app.task(bind=True) def process_document(self, filepath: str) -> Dict[str, Any]: """ Process a document file and extract its content. @@ -46,41 +47,31 @@ def process_document(self, filepath: str) -> Dict[str, Any]: Raises: Exception: Any processing error (will trigger retry) """ - task_id = self.request.id + task_id = self.request.id logger.info(f"Starting document processing task {task_id} for file: {filepath}") # get services document_service, job_service = get_services() job = None + document = None try: # Step 1: Create the document and a new job record for the document document = document_service.create_document(filepath) job = job_service.create_job(task_id=task_id, document_id=document.id) job_service.mark_job_as_started(job_id=job.id) - logger.info(f"Task {task_id} created for document {document.id} with file path: {filepath} and job id: {job.id}") + logger.info(f"Task {task_id} created for document {document.id} from file path: {filepath} and job id: {job.id}") - logger.info(f"Job {task_id} marked as PROCESSING") - - raw_file_hash = save_as_object(filepath) - logger.info(f"Job {task_id} saved document as object: {raw_file_hash}") + logger.info(f"Task {task_id} : Creating associated PDF") + job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF) + document_service.create_pdf(document.id) - # Step 4: Create the pdf version of the document - pdf_file_hash = convert_to_pdf(filepath, raw_file_hash) - logger.info(f"Job {task_id} saved PDF with hash: {pdf_file_hash}") - - - - # Step 3: Mark job as started - - # Step 4: Create the pdf version of the document - pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder()) - digest = save_as_object(pdf_file_path) - logger.info(f"Job {task_id} internal PDF file created: {digest}") + # remove the file from the watch folder + os.remove(filepath) # Step x: Mark job as completed job_service.mark_job_as_completed(job_id=job.id) - logger.info(f"Job {task_id} marked as COMPLETED") + logger.info(f"Task {task_id} marked as COMPLETED") return { "task_id": task_id, @@ -99,6 +90,11 @@ def process_document(self, filepath: str) -> Dict[str, Any]: logger.info(f"Job {task_id} marked as FAILED") else: logger.error(f"Failed to process {filepath}. error = {str(e)}") + + if document is not None: + document_service.move_to_errors(document.id, filepath) + logger.info(f"Moved file {filepath} to errors/{document.id}") + except Exception as job_error: logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}") diff --git a/src/worker/tasks/main.py b/src/worker/tasks/main.py index ada5228..97f4661 100644 --- a/src/worker/tasks/main.py +++ b/src/worker/tasks/main.py @@ -41,15 +41,10 @@ celery_app.conf.update( def global_init(**kwargs): """Initialize global variables.""" - logger.info(f"{'*' * 20}") + logger.info(f"{'*' * 45}") logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}") - logger.info(f"{'*' * 20}") - tmp_folder = settings.get_temp_folder() - if not os.path.exists(tmp_folder): - logger.info(f"Creating temporary folder: {tmp_folder}") - os.makedirs(tmp_folder) - else: - logger.info(f"Temporary folder already exists: {os.path.abspath(tmp_folder)}") + logger.info(f"{'*' * 45}") + global_init() diff --git a/tests/services/test_document_service.py b/tests/services/test_document_service.py index 5ca9867..80fb157 100644 --- a/tests/services/test_document_service.py +++ b/tests/services/test_document_service.py @@ -568,3 +568,137 @@ class TestFileTypeDetection: """Test unsupported file type raises ValueError.""" with pytest.raises(ValueError, match="Unsupported file type"): document_service._detect_file_type("/path/to/document.xyz") + + +class TestCreatePdf: + """Tests for create_pdf method.""" + + @patch('app.services.document_service.convert_to_pdf') + @patch('app.services.document_service.magic.from_buffer') + def test_i_can_create_pdf_successfully( + self, + mock_magic, + mock_convert_to_pdf, + document_service, + sample_file_bytes + ): + """Test creating PDF from an existing document.""" + # Setup + mock_magic.return_value = "text/plain" + + # Create a document first + created_doc = document_service.create_document( + "/test/test.txt", + sample_file_bytes, + "utf-8" + ) + + # Mock the PDF conversion + pdf_path = os.path.join(document_service.temp_folder, "converted.pdf") + mock_convert_to_pdf.return_value = pdf_path + + # Write a sample PDF file that the conversion would create + pdf_content = b"This is PDF content" + os.makedirs(os.path.dirname(pdf_path), exist_ok=True) + with open(pdf_path, "wb") as f: + f.write(pdf_content) + + # Execute + result = document_service.create_pdf(created_doc.id) + + # Verify + assert result is True + + # Get the updated document + updated_doc = document_service.get_document_by_id(created_doc.id) + assert updated_doc.pdf_file_hash is not None + + # Verify the PDF content was saved + pdf_hash = document_service._calculate_file_hash(pdf_content) + assert updated_doc.pdf_file_hash == pdf_hash + + # Verify convert_to_pdf was called with correct arguments + doc_path = document_service._get_document_path(created_doc.file_hash) + mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder) + + # Verify content exists on disk + validate_file_saved(document_service, pdf_hash, pdf_content) + + # Verify PDF hash was added to document + updated_doc = document_service.get_document_by_id(created_doc.id) + pdf_hash = document_service._calculate_file_hash(pdf_content) + assert updated_doc.pdf_file_hash == pdf_hash + + @patch('app.services.document_service.convert_to_pdf') + @patch('app.services.document_service.magic.from_buffer') + def test_i_can_reuse_existing_pdf( + self, + mock_magic, + mock_convert_to_pdf, + document_service, + sample_file_bytes + ): + """Test that if PDF already exists, it doesn't recreate it.""" + # Setup + mock_magic.return_value = "text/plain" + + # Create a document first + created_doc = document_service.create_document( + "/test/test.txt", + sample_file_bytes, + "utf-8" + ) + + # Create a fake PDF file and update the document + pdf_content = b"This is PDF content" + pdf_hash = document_service._calculate_file_hash(pdf_content) + document_service.save_content_if_needed(pdf_hash, pdf_content) + document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash}) + + # Execute + result = document_service.create_pdf(created_doc.id) + + # Verify + assert result is True + + # Verify convert_to_pdf was NOT called + mock_convert_to_pdf.assert_not_called() + + def test_i_cannot_create_pdf_for_nonexistent_document( + self, + document_service + ): + """Test behavior when document ID doesn't exist.""" + # Execute with random ObjectId + result = document_service.create_pdf(ObjectId()) + + # Verify + assert result is False + + @patch('app.services.document_service.magic.from_buffer') + def test_i_cannot_create_pdf_when_file_content_missing( + self, + mock_magic, + document_service, + sample_file_bytes + ): + """Test behavior when file content doesn't exist.""" + # Setup + mock_magic.return_value = "text/plain" + + # Create a document + created_doc = document_service.create_document( + "/test/test.txt", + sample_file_bytes, + "utf-8" + ) + + # Simulate missing content by removing file + file_path = document_service._get_document_path(created_doc.file_hash) + os.remove(file_path) + + # Execute + result = document_service.create_pdf(created_doc.id) + + # Verify + assert result is False diff --git a/tests/services/test_job_service.py b/tests/services/test_job_service.py index 5307ab9..c404470 100644 --- a/tests/services/test_job_service.py +++ b/tests/services/test_job_service.py @@ -417,6 +417,25 @@ class TestUpdateStatus: # Verify exception details assert exc_info.value.current_status == ProcessingStatus.FAILED assert exc_info.value.target_status == ProcessingStatus.FAILED + + def test_i_can_update_job_status( + self, + job_service, + sample_document_id, + sample_task_id + ): + """Test that failed job cannot be marked as failed again.""" + # Create, start, and fail a job + created_job = job_service.create_job(sample_document_id, sample_task_id) + job_service.mark_job_as_started(created_job.id) + + # Execute without error message + result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT) + + # Verify status transition + assert result is not None + assert result.status == ProcessingStatus.SAVING_OBJECT + assert result.error_message is None class TestDeleteJob: diff --git a/tests/common/test_pdf_converter.py b/tests/utils/test_pdf_converter.py similarity index 66% rename from tests/common/test_pdf_converter.py rename to tests/utils/test_pdf_converter.py index a5718fd..04a56a6 100644 --- a/tests/common/test_pdf_converter.py +++ b/tests/utils/test_pdf_converter.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest -from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter +from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter @pytest.fixture @@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir): input_txt.write_text("Hello World!\nThis is a test.") converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir) - output_pdf = converter.convert() + converter.convert() - assert Path(output_pdf).exists() - assert output_pdf.endswith(".pdf") + assert Path(converter.output_path).exists() + assert str(converter.output_path).endswith(".pdf") def test_i_can_convert_image_to_pdf(temp_dir): @@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir): image.save(input_img) converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir) - output_pdf = converter.convert() + converter.convert() - assert Path(output_pdf).exists() - assert output_pdf.endswith(".pdf") + assert Path(converter.output_path).exists() + assert str(converter.output_path).endswith(".pdf") def test_i_can_convert_word_to_pdf(temp_dir): @@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir): doc.save(input_docx) converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir) - output_pdf = converter.convert() + converter.convert() - assert Path(output_pdf).exists() - assert output_pdf.endswith(".pdf") + assert Path(converter.output_path).exists() + assert str(converter.output_path).endswith(".pdf")