From 78181e71be2bd3f84ea7b14bb2a66cdc6652c6d7 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Mon, 29 Sep 2025 23:04:48 +0200 Subject: [PATCH] Added Login + Working on pdf creation --- requirements.txt | 6 + src/file-processor/Dockerfile | 3 + src/file-processor/app/config/settings.py | 23 +--- src/file-processor/app/file_watcher.py | 18 ++- src/file-processor/app/main.py | 6 +- src/file-processor/requirements.txt | 3 + src/frontend/package-lock.json | 10 ++ src/frontend/package.json | 1 + src/frontend/src/assets/icons.jsx | 35 +++++ .../src/components/auth/AuthLayout.jsx | 16 ++- .../src/components/auth/LoginForm.jsx | 1 - src/frontend/src/components/common/Header.jsx | 45 ++----- src/frontend/src/components/common/Layout.jsx | 10 +- src/frontend/src/components/common/Menu.jsx | 16 +++ .../src/components/common/ThemeSwither.jsx | 120 ++++-------------- src/worker/Dockerfile | 3 + src/worker/requirements.txt | 7 +- src/worker/tasks/common/__init__.py | 0 src/worker/tasks/common/converter_utils.py | 102 +++++++++++++++ src/worker/tasks/common/pdf_converter.py | 83 ++++++++++++ src/worker/tasks/document_processing.py | 22 +++- src/worker/tasks/main.py | 20 ++- tests/common/__init__.py | 0 tests/common/test_pdf_converter.py | 55 ++++++++ tests/common/test_utils.py | 52 ++++++++ 25 files changed, 481 insertions(+), 176 deletions(-) create mode 100644 src/frontend/src/assets/icons.jsx create mode 100644 src/frontend/src/components/common/Menu.jsx create mode 100644 src/worker/tasks/common/__init__.py create mode 100644 src/worker/tasks/common/converter_utils.py create mode 100644 src/worker/tasks/common/pdf_converter.py create mode 100644 tests/common/__init__.py create mode 100644 tests/common/test_pdf_converter.py create mode 100644 tests/common/test_utils.py diff --git a/requirements.txt b/requirements.txt index 0bf5cc1..de1f41e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ billiard==4.2.1 celery==5.5.3 certifi==2025.8.3 cffi==2.0.0 +charset-normalizer==3.4.3 click==8.2.1 click-didyoumean==0.3.1 click-plugins==1.1.1.2 @@ -26,10 +27,12 @@ importlib_metadata==8.7.0 iniconfig==2.1.0 izulu==0.50.0 kombu==5.5.4 +lxml==6.0.2 mongomock==4.3.0 mongomock-motor==0.0.36 motor==3.7.1 packaging==25.0 +pillow==11.3.0 pipdeptree==2.28.0 pluggy==1.6.0 prompt_toolkit==3.0.52 @@ -41,16 +44,19 @@ pydantic_core==2.33.2 Pygments==2.19.2 PyJWT==2.10.1 pymongo==4.15.1 +pypandoc==1.15 pytest==8.4.2 pytest-asyncio==1.2.0 pytest-mock==3.15.1 python-dateutil==2.9.0.post0 +python-docx==1.2.0 python-dotenv==1.1.1 python-magic==0.4.27 python-multipart==0.0.20 pytz==2025.2 PyYAML==6.0.2 redis==6.4.0 +reportlab==4.4.4 rsa==4.9.1 sentinels==1.1.1 six==1.17.0 diff --git a/src/file-processor/Dockerfile b/src/file-processor/Dockerfile index 434b3cf..a0b7b63 100644 --- a/src/file-processor/Dockerfile +++ b/src/file-processor/Dockerfile @@ -7,6 +7,9 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ libmagic1 \ file \ + pandoc \ + ghostscript \ + texlive-xetex \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install dependencies diff --git a/src/file-processor/app/config/settings.py b/src/file-processor/app/config/settings.py index 81fdb1c..8a737f1 100644 --- a/src/file-processor/app/config/settings.py +++ b/src/file-processor/app/config/settings.py @@ -34,22 +34,6 @@ def get_redis_url() -> str: return os.getenv("REDIS_URL", "redis://localhost:6379/0") -# def get_redis_host() -> str: -# redis_url = get_redis_url() -# if redis_url.startswith("redis://"): -# return redis_url.split("redis://")[1].split("/")[0] -# else: -# return redis_url -# -# -# def get_redis_port() -> int: -# redis_url = get_redis_url() -# if redis_url.startswith("redis://"): -# return int(redis_url.split("redis://")[1].split("/")[0].split(":")[1]) -# else: -# return int(redis_url.split(":")[1]) - - def get_jwt_secret_key() -> str: """ Get JWT secret key from environment variables. @@ -114,6 +98,11 @@ def get_objects_folder() -> str: return os.getenv("OBJECTS_FOLDER", "/objects") -def watch_directory() -> str: +def get_watch_folder() -> str: """Directory to monitor for new files""" return os.getenv("WATCH_DIRECTORY", "/watched_files") + + +def get_temp_folder() -> str: + """Directory to store temporary files""" + return os.getenv("TEMP_DIRECTORY", "/temp") diff --git a/src/file-processor/app/file_watcher.py b/src/file-processor/app/file_watcher.py index 4bc5c72..f3b9eb0 100644 --- a/src/file-processor/app/file_watcher.py +++ b/src/file-processor/app/file_watcher.py @@ -63,17 +63,15 @@ class DocumentFileEventHandler(FileSystemEventHandler): logger.info(f"Processing new file: {filepath}") - # try: - from tasks.document_processing import process_document - task_result = process_document.delay(filepath) - print(task_result) - print("hello world") - # task_id = task_result.task_id - # logger.info(f"Dispatched Celery task with ID: {task_id}") + try: + from tasks.document_processing import process_document + task_result = process_document.delay(filepath) + task_id = task_result.task_id + logger.info(f"Dispatched Celery task with ID: {task_id}") - # except Exception as e: - # logger.error(f"Failed to process file {filepath}: {str(e)}") - # # Note: We don't re-raise the exception to keep the watcher running + except Exception as e: + logger.error(f"Failed to process file {filepath}: {str(e)}") + # Note: We don't re-raise the exception to keep the watcher running class FileWatcher: diff --git a/src/file-processor/app/main.py b/src/file-processor/app/main.py index 5bc661b..b35825a 100644 --- a/src/file-processor/app/main.py +++ b/src/file-processor/app/main.py @@ -65,12 +65,12 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # Create and start file watcher file_watcher = create_file_watcher( - watch_directory=settings.watch_directory(), + watch_directory=settings.get_watch_folder(), document_service=document_service, job_service=job_service ) file_watcher.start() - logger.info(f"FileWatcher started for directory: {settings.watch_directory()}") + logger.info(f"FileWatcher started for directory: {settings.get_watch_folder()}") logger.info("Application startup completed successfully") @@ -102,7 +102,7 @@ app = FastAPI( # Configure CORS app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:5173"], # React frontend + allow_origins=["http://localhost:5173", "http://localhost:5174"], # React frontend allow_credentials=True, allow_methods=["*"], allow_headers=["*"], diff --git a/src/file-processor/requirements.txt b/src/file-processor/requirements.txt index 2e0c05d..6ad69ce 100644 --- a/src/file-processor/requirements.txt +++ b/src/file-processor/requirements.txt @@ -5,11 +5,14 @@ email-validator==2.3.0 fastapi==0.116.1 httptools==0.6.4 motor==3.7.1 +pillow==11.3.0 pydantic==2.11.9 PyJWT==2.10.1 pymongo==4.15.0 +pypandoc==1.15 python-multipart==0.0.20 redis==6.4.0 +reportlab==4.4.4 uvicorn==0.35.0 python-magic==0.4.27 watchdog==6.0.0 \ No newline at end of file diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json index 64910b5..36930c1 100644 --- a/src/frontend/package-lock.json +++ b/src/frontend/package-lock.json @@ -12,6 +12,7 @@ "axios": "^1.12.2", "react": "^19.1.1", "react-dom": "^19.1.1", + "react-icons": "^5.5.0", "react-router-dom": "^7.9.3" }, "devDependencies": { @@ -3377,6 +3378,15 @@ "react": "^19.1.1" } }, + "node_modules/react-icons": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.5.0.tgz", + "integrity": "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==", + "license": "MIT", + "peerDependencies": { + "react": "*" + } + }, "node_modules/react-refresh": { "version": "0.17.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz", diff --git a/src/frontend/package.json b/src/frontend/package.json index 8dbd964..9a91f1a 100644 --- a/src/frontend/package.json +++ b/src/frontend/package.json @@ -14,6 +14,7 @@ "axios": "^1.12.2", "react": "^19.1.1", "react-dom": "^19.1.1", + "react-icons": "^5.5.0", "react-router-dom": "^7.9.3" }, "devDependencies": { diff --git a/src/frontend/src/assets/icons.jsx b/src/frontend/src/assets/icons.jsx new file mode 100644 index 0000000..cb676ec --- /dev/null +++ b/src/frontend/src/assets/icons.jsx @@ -0,0 +1,35 @@ +// src/assets/icons.jsx + +export const SunIcon = ( + + + +); + +export const MoonIcon = ( + + + +); diff --git a/src/frontend/src/components/auth/AuthLayout.jsx b/src/frontend/src/components/auth/AuthLayout.jsx index 30cd85e..f793246 100644 --- a/src/frontend/src/components/auth/AuthLayout.jsx +++ b/src/frontend/src/components/auth/AuthLayout.jsx @@ -8,13 +8,27 @@ import ThemeSwitcher from "../common/ThemeSwither.jsx"; * @param {Object} props - Component props * @param {React.ReactNode} props.children - Child components to render */ + +const AuthHeader = () => { + return ( +
+
+

MyDocManager

+
+
+ +
+
+ ) +} + function AuthLayout({children}) { return (
+ {/* Main container with flex centering */}
{/* Content wrapper for responsive spacing */} -
{children}
diff --git a/src/frontend/src/components/auth/LoginForm.jsx b/src/frontend/src/components/auth/LoginForm.jsx index 7614015..ada7875 100644 --- a/src/frontend/src/components/auth/LoginForm.jsx +++ b/src/frontend/src/components/auth/LoginForm.jsx @@ -96,7 +96,6 @@ function LoginForm() {
{/* Card Header */}
-

My Documents DocManager

Sign in to your account

diff --git a/src/frontend/src/components/common/Header.jsx b/src/frontend/src/components/common/Header.jsx index 24b8b46..436c874 100644 --- a/src/frontend/src/components/common/Header.jsx +++ b/src/frontend/src/components/common/Header.jsx @@ -1,8 +1,10 @@ -import { useAuth } from '../../hooks/useAuth'; -import { useNavigate } from 'react-router-dom'; +import {useAuth} from '../../hooks/useAuth'; +import {useNavigate} from 'react-router-dom'; +import ThemeSwitcher from "./ThemeSwither.jsx"; +import React from "react"; const Header = () => { - const { user, logout } = useAuth(); + const {user, logout} = useAuth(); const navigate = useNavigate(); const handleLogout = async () => { @@ -13,35 +15,8 @@ const Header = () => { return (
-
-
- - - -
- -
- - MyDocManager - +

MyDocManager

- -
- -
-
@@ -59,12 +34,16 @@ const Header = () => {
  • Settings
  • -
  • +
  • +
  • + +
  • +
    ); }; -export default Header; \ No newline at end of file +export default Header; diff --git a/src/frontend/src/components/common/Layout.jsx b/src/frontend/src/components/common/Layout.jsx index 3a65081..8b9cf96 100644 --- a/src/frontend/src/components/common/Layout.jsx +++ b/src/frontend/src/components/common/Layout.jsx @@ -1,13 +1,17 @@ import Header from './Header'; import {Outlet} from 'react-router-dom'; +import Menu from "./Menu.jsx"; const Layout = () => { return (
    -
    - -
    +
    + +
    + +
    +
    ); }; diff --git a/src/frontend/src/components/common/Menu.jsx b/src/frontend/src/components/common/Menu.jsx new file mode 100644 index 0000000..fa45f33 --- /dev/null +++ b/src/frontend/src/components/common/Menu.jsx @@ -0,0 +1,16 @@ +import {FaBuffer, FaPlus} from "react-icons/fa6"; + +const Menu = () => { + return ( +
    + +
    + ) +} + +export default Menu; \ No newline at end of file diff --git a/src/frontend/src/components/common/ThemeSwither.jsx b/src/frontend/src/components/common/ThemeSwither.jsx index 2e416a8..a5e4293 100644 --- a/src/frontend/src/components/common/ThemeSwither.jsx +++ b/src/frontend/src/components/common/ThemeSwither.jsx @@ -1,103 +1,29 @@ -import React from "react"; +import {useEffect, useState} from "react"; +import {MoonIcon, SunIcon} from "../../assets/icons.jsx"; -const themes = [ - "light", "dark", "cupcake" -]; +function ThemeSwitcher() { + // State to store current theme + const [theme, setTheme] = useState("light"); + + // When theme changes, apply it to + useEffect(() => { + document.querySelector("html").setAttribute("data-theme", theme); + }, [theme]); + + // Toggle between light and dark + const toggleTheme = () => { + setTheme(theme === "light" ? "dark" : "light"); + }; -export default function ThemeSwitcher() { return ( -
    -
    -
    -
    -
    -
    -
    -
    - - - -
    + - - ))} - -
  • - - - - - - - - - - - - - -
    Créer votre thème!
    -
    -
  • - -
    -
    + ); } + +export default ThemeSwitcher; diff --git a/src/worker/Dockerfile b/src/worker/Dockerfile index fe7d573..8e45379 100644 --- a/src/worker/Dockerfile +++ b/src/worker/Dockerfile @@ -7,6 +7,9 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ libmagic1 \ file \ + pandoc \ + ghostscript \ + texlive-xetex \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install dependencies diff --git a/src/worker/requirements.txt b/src/worker/requirements.txt index b44281a..6ad69ce 100644 --- a/src/worker/requirements.txt +++ b/src/worker/requirements.txt @@ -5,9 +5,14 @@ email-validator==2.3.0 fastapi==0.116.1 httptools==0.6.4 motor==3.7.1 -pymongo==4.15.0 +pillow==11.3.0 pydantic==2.11.9 +PyJWT==2.10.1 +pymongo==4.15.0 +pypandoc==1.15 +python-multipart==0.0.20 redis==6.4.0 +reportlab==4.4.4 uvicorn==0.35.0 python-magic==0.4.27 watchdog==6.0.0 \ No newline at end of file diff --git a/src/worker/tasks/common/__init__.py b/src/worker/tasks/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/worker/tasks/common/converter_utils.py b/src/worker/tasks/common/converter_utils.py new file mode 100644 index 0000000..27251dd --- /dev/null +++ b/src/worker/tasks/common/converter_utils.py @@ -0,0 +1,102 @@ +import subprocess +import uuid +from pathlib import Path + +import magic # python-magic + +from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter + + +class UnsupportedFileTypeError(Exception): + """Exception raised when a file type is not supported.""" + pass + + +def generate_uuid_filename() -> str: + """Generate a unique filename using UUID4.""" + return str(uuid.uuid4()) + + +def detect_file_type(file_path: str) -> str: + """ + Detect the type of file using python-magic. + + Returns: + 'text', 'image', 'word' + + Raises: + UnsupportedFileTypeError: If file type is not supported. + """ + mime = magic.from_file(file_path, mime=True) + if mime.startswith("text/"): + return "text" + elif mime.startswith("image/"): + return "image" + elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",): + return "word" + else: + raise UnsupportedFileTypeError(f"Unsupported file type: {mime}") + + +def compress_pdf(input_pdf: str, output_pdf: str, quality: str = "ebook") -> None: + """ + Compress a PDF using Ghostscript. + + Args: + input_pdf (str): Path to the input PDF. + output_pdf (str): Path to save the compressed PDF. + quality (str): Ghostscript PDFSETTINGS option: screen, ebook, printer, prepress. + + Raises: + FileNotFoundError: If input PDF does not exist. + RuntimeError: If Ghostscript returns an error. + """ + input_path = Path(input_pdf) + output_path = Path(output_pdf) + + if not input_path.exists(): + raise FileNotFoundError(f"Input PDF not found: {input_pdf}") + + cmd = [ + "gs", + "-sDEVICE=pdfwrite", + "-dCompatibilityLevel=1.4", + f"-dPDFSETTINGS=/{quality}", + "-dNOPAUSE", + "-dQUIET", + "-dBATCH", + f"-sOutputFile={str(output_path)}", + str(input_path), + ] + + result = subprocess.run(cmd) + if result.returncode != 0: + raise RuntimeError(f"Ghostscript failed with return code {result.returncode}") + + +def convert_to_pdf(filepath: str, output_dir: str = ".") -> str: + """ + Convert any supported file to PDF. + + Args: + filepath (str): Path to the input file. + output_dir (str): Directory to save the output PDF. + + Returns: + str: Path to the generated PDF. + + Raises: + UnsupportedFileTypeError: If the input file type is not supported. + """ + file_type = detect_file_type(filepath) + + if file_type == "text": + converter = TextToPdfConverter(filepath, output_dir=output_dir) + elif file_type == "image": + converter = ImageToPdfConverter(filepath, output_dir=output_dir) + elif file_type == "word": + converter = WordToPdfConverter(filepath, output_dir=output_dir) + else: + raise ValueError(f"Unsupported file type: {file_type}") + + return converter.convert() diff --git a/src/worker/tasks/common/pdf_converter.py b/src/worker/tasks/common/pdf_converter.py new file mode 100644 index 0000000..92de243 --- /dev/null +++ b/src/worker/tasks/common/pdf_converter.py @@ -0,0 +1,83 @@ +from abc import ABC, abstractmethod +from pathlib import Path + +import pypandoc +from PIL import Image +from reportlab.lib.pagesizes import A4 +from reportlab.pdfgen import canvas + +from tasks.common.converter_utils import generate_uuid_filename + + +class BaseConverter(ABC): + """Abstract base class for file converters to PDF.""" + + def __init__(self, input_path: str, output_dir: str = ".") -> None: + self.input_path = Path(input_path) + self.output_dir = Path(output_dir) + self.output_path = self.output_dir / f"{generate_uuid_filename()}.pdf" + + @abstractmethod + def convert(self) -> str: + """Convert input file to PDF and return the output path.""" + pass + + +class TextToPdfConverter(BaseConverter): + """Converter for text files to PDF.""" + + def convert(self) -> str: + c = canvas.Canvas(str(self.output_path), pagesize=A4) + width, height = A4 + with open(self.input_path, "r", encoding="utf-8") as f: + y = height - 50 + for line in f: + c.drawString(50, y, line.strip()) + y -= 15 + if y < 50: + c.showPage() + y = height - 50 + c.save() + return str(self.output_path) + + +class ImageToPdfConverter(BaseConverter): + """Converter for image files to PDF.""" + + def convert(self) -> str: + image = Image.open(self.input_path) + rgb_image = image.convert("RGB") + rgb_image.save(self.output_path) + return str(self.output_path) + + +class WordToPdfConverter(BaseConverter): + """Converter for Word files (.docx) to PDF using pypandoc.""" + + def convert(self) -> str: + pypandoc.convert_file( + str(self.input_path), "pdf", outputfile=str(self.output_path) + ) + return str(self.output_path) + + +# Placeholders for future extensions +class HtmlToPdfConverter(BaseConverter): + """Placeholder for HTML to PDF converter.""" + + def convert(self) -> str: + raise NotImplementedError("HTML to PDF conversion not implemented.") + + +class ExcelToPdfConverter(BaseConverter): + """Placeholder for Excel to PDF converter.""" + + def convert(self) -> str: + raise NotImplementedError("Excel to PDF conversion not implemented.") + + +class MarkdownToPdfConverter(BaseConverter): + """Placeholder for Markdown to PDF converter.""" + + def convert(self) -> str: + raise NotImplementedError("Markdown to PDF conversion not implemented.") diff --git a/src/worker/tasks/document_processing.py b/src/worker/tasks/document_processing.py index f71c3e3..227024e 100644 --- a/src/worker/tasks/document_processing.py +++ b/src/worker/tasks/document_processing.py @@ -11,10 +11,20 @@ from typing import Any, Dict from app.config import settings from app.database.connection import get_database from app.services.document_service import DocumentService +from app.services.job_service import JobService +from tasks.common.converter_utils import convert_to_pdf from tasks.main import celery_app logger = logging.getLogger(__name__) + +def get_services(): + database = get_database() + document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder()) + job_service = JobService(database=database) + return document_service, job_service + + @celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60}) def process_document(self, filepath: str) -> Dict[str, Any]: """ @@ -38,10 +48,8 @@ def process_document(self, filepath: str) -> Dict[str, Any]: task_id = self.request.id logger.info(f"Starting document processing task {task_id} for file: {filepath}") - database = get_database() - document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder()) - from app.services.job_service import JobService - job_service = JobService(database=database) + # get services + document_service, job_service = get_services() job = None try: @@ -56,7 +64,10 @@ def process_document(self, filepath: str) -> Dict[str, Any]: job_service.mark_job_as_started(job_id=job.id) logger.info(f"Job {task_id} marked as PROCESSING") - # Step 4: Mark job as completed + # Step 4: Create the pdf version of the document + pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder()) + + # Step x: Mark job as completed job_service.mark_job_as_completed(job_id=job.id) logger.info(f"Job {task_id} marked as COMPLETED") @@ -82,4 +93,3 @@ def process_document(self, filepath: str) -> Dict[str, Any]: # Re-raise the exception to trigger Celery retry mechanism raise - diff --git a/src/worker/tasks/main.py b/src/worker/tasks/main.py index f76c202..a32e82a 100644 --- a/src/worker/tasks/main.py +++ b/src/worker/tasks/main.py @@ -3,13 +3,19 @@ Celery worker for MyDocManager document processing tasks. This module contains all Celery tasks for processing documents. """ +import logging import os from celery import Celery +from app.config import settings + + # Environment variables -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") -MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") +REDIS_URL = settings.get_redis_url() +MONGODB_URL = settings.get_mongodb_url() + +logger = logging.getLogger(__name__) # Initialize Celery app celery_app = Celery( @@ -28,9 +34,15 @@ celery_app.conf.update( timezone="UTC", enable_utc=True, task_track_started=True, - task_time_limit=300, # 5 minutes - task_soft_time_limit=240, # 4 minutes + task_time_limit=300, # 5 minutes + task_soft_time_limit=240, # 4 minutes ) if __name__ == "__main__": + # initialize temp folder if needed + tmp_folder = settings.get_temp_folder() + if not os.path.exists(tmp_folder): + logger.info(f"Creating temporary folder: {tmp_folder}") + os.makedirs(tmp_folder) + celery_app.start() diff --git a/tests/common/__init__.py b/tests/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/common/test_pdf_converter.py b/tests/common/test_pdf_converter.py new file mode 100644 index 0000000..a5718fd --- /dev/null +++ b/tests/common/test_pdf_converter.py @@ -0,0 +1,55 @@ +import shutil +import tempfile +from pathlib import Path + +import pytest + +from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for output PDFs.""" + dir_path = tempfile.mkdtemp() + yield dir_path + shutil.rmtree(dir_path) + + +def test_i_can_convert_text_to_pdf(temp_dir): + input_txt = Path(temp_dir) / "test.txt" + input_txt.write_text("Hello World!\nThis is a test.") + + converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir) + output_pdf = converter.convert() + + assert Path(output_pdf).exists() + assert output_pdf.endswith(".pdf") + + +def test_i_can_convert_image_to_pdf(temp_dir): + from PIL import Image + + input_img = Path(temp_dir) / "image.png" + image = Image.new("RGB", (100, 100), color="red") + image.save(input_img) + + converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir) + output_pdf = converter.convert() + + assert Path(output_pdf).exists() + assert output_pdf.endswith(".pdf") + + +def test_i_can_convert_word_to_pdf(temp_dir): + import docx + + input_docx = Path(temp_dir) / "document.docx" + doc = docx.Document() + doc.add_paragraph("Hello Word!") + doc.save(input_docx) + + converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir) + output_pdf = converter.convert() + + assert Path(output_pdf).exists() + assert output_pdf.endswith(".pdf") diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py new file mode 100644 index 0000000..4f7347e --- /dev/null +++ b/tests/common/test_utils.py @@ -0,0 +1,52 @@ +import shutil +import tempfile +from pathlib import Path + +import pytest + +from tasks.common.converter_utils import detect_file_type, UnsupportedFileTypeError + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for output PDFs.""" + dir_path = tempfile.mkdtemp() + yield dir_path + shutil.rmtree(dir_path) + + +def test_i_can_detect_text_file(temp_dir): + txt_file = Path(temp_dir) / "sample.txt" + txt_file.write_text("Sample text content") + detected_type = detect_file_type(str(txt_file)) + assert detected_type == "text" + + +def test_i_can_detect_image_file(temp_dir): + from PIL import Image + + img_file = Path(temp_dir) / "sample.jpg" + image = Image.new("RGB", (50, 50), color="blue") + image.save(img_file) + + detected_type = detect_file_type(str(img_file)) + assert detected_type == "image" + + +def test_i_can_detect_word_file(temp_dir): + import docx + + docx_file = Path(temp_dir) / "sample.docx" + doc = docx.Document() + doc.add_paragraph("Sample content") + doc.save(docx_file) + + detected_type = detect_file_type(str(docx_file)) + assert detected_type == "word" + + +def test_i_cannot_detect_unsupported_file(temp_dir): + exe_file = Path(temp_dir) / "sample.exe" + exe_file.write_bytes(b'\x4D\x5A\x90\x00\x03\x00\x00\x00') + with pytest.raises(UnsupportedFileTypeError): + detect_file_type(str(exe_file))