diff --git a/src/frontend/src/components/auth/LoginForm.jsx b/src/frontend/src/components/auth/LoginForm.jsx
index 7614015..ada7875 100644
--- a/src/frontend/src/components/auth/LoginForm.jsx
+++ b/src/frontend/src/components/auth/LoginForm.jsx
@@ -96,7 +96,6 @@ function LoginForm() {
{/* Card Header */}
-
My Documents DocManager
Sign in to your account
diff --git a/src/frontend/src/components/common/Header.jsx b/src/frontend/src/components/common/Header.jsx
index 24b8b46..436c874 100644
--- a/src/frontend/src/components/common/Header.jsx
+++ b/src/frontend/src/components/common/Header.jsx
@@ -1,8 +1,10 @@
-import { useAuth } from '../../hooks/useAuth';
-import { useNavigate } from 'react-router-dom';
+import {useAuth} from '../../hooks/useAuth';
+import {useNavigate} from 'react-router-dom';
+import ThemeSwitcher from "./ThemeSwither.jsx";
+import React from "react";
const Header = () => {
- const { user, logout } = useAuth();
+ const {user, logout} = useAuth();
const navigate = useNavigate();
const handleLogout = async () => {
@@ -13,35 +15,8 @@ const Header = () => {
return (
-
-
-
@@ -59,12 +34,16 @@ const Header = () => {
Settings
-
+
+
+
+
+
);
};
-export default Header;
\ No newline at end of file
+export default Header;
diff --git a/src/frontend/src/components/common/Layout.jsx b/src/frontend/src/components/common/Layout.jsx
index 3a65081..8b9cf96 100644
--- a/src/frontend/src/components/common/Layout.jsx
+++ b/src/frontend/src/components/common/Layout.jsx
@@ -1,13 +1,17 @@
import Header from './Header';
import {Outlet} from 'react-router-dom';
+import Menu from "./Menu.jsx";
const Layout = () => {
return (
);
};
diff --git a/src/frontend/src/components/common/Menu.jsx b/src/frontend/src/components/common/Menu.jsx
new file mode 100644
index 0000000..fa45f33
--- /dev/null
+++ b/src/frontend/src/components/common/Menu.jsx
@@ -0,0 +1,16 @@
+import {FaBuffer, FaPlus} from "react-icons/fa6";
+
+const Menu = () => {
+ return (
+
+ )
+}
+
+export default Menu;
\ No newline at end of file
diff --git a/src/frontend/src/components/common/ThemeSwither.jsx b/src/frontend/src/components/common/ThemeSwither.jsx
index 2e416a8..a5e4293 100644
--- a/src/frontend/src/components/common/ThemeSwither.jsx
+++ b/src/frontend/src/components/common/ThemeSwither.jsx
@@ -1,103 +1,29 @@
-import React from "react";
+import {useEffect, useState} from "react";
+import {MoonIcon, SunIcon} from "../../assets/icons.jsx";
-const themes = [
- "light", "dark", "cupcake"
-];
+function ThemeSwitcher() {
+ // State to store current theme
+ const [theme, setTheme] = useState("light");
+
+ // When theme changes, apply it to
+ useEffect(() => {
+ document.querySelector("html").setAttribute("data-theme", theme);
+ }, [theme]);
+
+ // Toggle between light and dark
+ const toggleTheme = () => {
+ setTheme(theme === "light" ? "dark" : "light");
+ };
-export default function ThemeSwitcher() {
return (
-
-
+
+
);
}
+
+export default ThemeSwitcher;
diff --git a/src/worker/Dockerfile b/src/worker/Dockerfile
index fe7d573..8e45379 100644
--- a/src/worker/Dockerfile
+++ b/src/worker/Dockerfile
@@ -7,6 +7,9 @@ WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \
file \
+ pandoc \
+ ghostscript \
+ texlive-xetex \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install dependencies
diff --git a/src/worker/requirements.txt b/src/worker/requirements.txt
index b44281a..6ad69ce 100644
--- a/src/worker/requirements.txt
+++ b/src/worker/requirements.txt
@@ -5,9 +5,14 @@ email-validator==2.3.0
fastapi==0.116.1
httptools==0.6.4
motor==3.7.1
-pymongo==4.15.0
+pillow==11.3.0
pydantic==2.11.9
+PyJWT==2.10.1
+pymongo==4.15.0
+pypandoc==1.15
+python-multipart==0.0.20
redis==6.4.0
+reportlab==4.4.4
uvicorn==0.35.0
python-magic==0.4.27
watchdog==6.0.0
\ No newline at end of file
diff --git a/src/worker/tasks/common/__init__.py b/src/worker/tasks/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/worker/tasks/common/converter_utils.py b/src/worker/tasks/common/converter_utils.py
new file mode 100644
index 0000000..27251dd
--- /dev/null
+++ b/src/worker/tasks/common/converter_utils.py
@@ -0,0 +1,102 @@
+import subprocess
+import uuid
+from pathlib import Path
+
+import magic # python-magic
+
+from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
+
+
+class UnsupportedFileTypeError(Exception):
+ """Exception raised when a file type is not supported."""
+ pass
+
+
+def generate_uuid_filename() -> str:
+ """Generate a unique filename using UUID4."""
+ return str(uuid.uuid4())
+
+
+def detect_file_type(file_path: str) -> str:
+ """
+ Detect the type of file using python-magic.
+
+ Returns:
+ 'text', 'image', 'word'
+
+ Raises:
+ UnsupportedFileTypeError: If file type is not supported.
+ """
+ mime = magic.from_file(file_path, mime=True)
+ if mime.startswith("text/"):
+ return "text"
+ elif mime.startswith("image/"):
+ return "image"
+ elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
+ return "word"
+ else:
+ raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
+
+
+def compress_pdf(input_pdf: str, output_pdf: str, quality: str = "ebook") -> None:
+ """
+ Compress a PDF using Ghostscript.
+
+ Args:
+ input_pdf (str): Path to the input PDF.
+ output_pdf (str): Path to save the compressed PDF.
+ quality (str): Ghostscript PDFSETTINGS option: screen, ebook, printer, prepress.
+
+ Raises:
+ FileNotFoundError: If input PDF does not exist.
+ RuntimeError: If Ghostscript returns an error.
+ """
+ input_path = Path(input_pdf)
+ output_path = Path(output_pdf)
+
+ if not input_path.exists():
+ raise FileNotFoundError(f"Input PDF not found: {input_pdf}")
+
+ cmd = [
+ "gs",
+ "-sDEVICE=pdfwrite",
+ "-dCompatibilityLevel=1.4",
+ f"-dPDFSETTINGS=/{quality}",
+ "-dNOPAUSE",
+ "-dQUIET",
+ "-dBATCH",
+ f"-sOutputFile={str(output_path)}",
+ str(input_path),
+ ]
+
+ result = subprocess.run(cmd)
+ if result.returncode != 0:
+ raise RuntimeError(f"Ghostscript failed with return code {result.returncode}")
+
+
+def convert_to_pdf(filepath: str, output_dir: str = ".") -> str:
+ """
+ Convert any supported file to PDF.
+
+ Args:
+ filepath (str): Path to the input file.
+ output_dir (str): Directory to save the output PDF.
+
+ Returns:
+ str: Path to the generated PDF.
+
+ Raises:
+ UnsupportedFileTypeError: If the input file type is not supported.
+ """
+ file_type = detect_file_type(filepath)
+
+ if file_type == "text":
+ converter = TextToPdfConverter(filepath, output_dir=output_dir)
+ elif file_type == "image":
+ converter = ImageToPdfConverter(filepath, output_dir=output_dir)
+ elif file_type == "word":
+ converter = WordToPdfConverter(filepath, output_dir=output_dir)
+ else:
+ raise ValueError(f"Unsupported file type: {file_type}")
+
+ return converter.convert()
diff --git a/src/worker/tasks/common/pdf_converter.py b/src/worker/tasks/common/pdf_converter.py
new file mode 100644
index 0000000..92de243
--- /dev/null
+++ b/src/worker/tasks/common/pdf_converter.py
@@ -0,0 +1,83 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+import pypandoc
+from PIL import Image
+from reportlab.lib.pagesizes import A4
+from reportlab.pdfgen import canvas
+
+from tasks.common.converter_utils import generate_uuid_filename
+
+
+class BaseConverter(ABC):
+ """Abstract base class for file converters to PDF."""
+
+ def __init__(self, input_path: str, output_dir: str = ".") -> None:
+ self.input_path = Path(input_path)
+ self.output_dir = Path(output_dir)
+ self.output_path = self.output_dir / f"{generate_uuid_filename()}.pdf"
+
+ @abstractmethod
+ def convert(self) -> str:
+ """Convert input file to PDF and return the output path."""
+ pass
+
+
+class TextToPdfConverter(BaseConverter):
+ """Converter for text files to PDF."""
+
+ def convert(self) -> str:
+ c = canvas.Canvas(str(self.output_path), pagesize=A4)
+ width, height = A4
+ with open(self.input_path, "r", encoding="utf-8") as f:
+ y = height - 50
+ for line in f:
+ c.drawString(50, y, line.strip())
+ y -= 15
+ if y < 50:
+ c.showPage()
+ y = height - 50
+ c.save()
+ return str(self.output_path)
+
+
+class ImageToPdfConverter(BaseConverter):
+ """Converter for image files to PDF."""
+
+ def convert(self) -> str:
+ image = Image.open(self.input_path)
+ rgb_image = image.convert("RGB")
+ rgb_image.save(self.output_path)
+ return str(self.output_path)
+
+
+class WordToPdfConverter(BaseConverter):
+ """Converter for Word files (.docx) to PDF using pypandoc."""
+
+ def convert(self) -> str:
+ pypandoc.convert_file(
+ str(self.input_path), "pdf", outputfile=str(self.output_path)
+ )
+ return str(self.output_path)
+
+
+# Placeholders for future extensions
+class HtmlToPdfConverter(BaseConverter):
+ """Placeholder for HTML to PDF converter."""
+
+ def convert(self) -> str:
+ raise NotImplementedError("HTML to PDF conversion not implemented.")
+
+
+class ExcelToPdfConverter(BaseConverter):
+ """Placeholder for Excel to PDF converter."""
+
+ def convert(self) -> str:
+ raise NotImplementedError("Excel to PDF conversion not implemented.")
+
+
+class MarkdownToPdfConverter(BaseConverter):
+ """Placeholder for Markdown to PDF converter."""
+
+ def convert(self) -> str:
+ raise NotImplementedError("Markdown to PDF conversion not implemented.")
diff --git a/src/worker/tasks/document_processing.py b/src/worker/tasks/document_processing.py
index f71c3e3..227024e 100644
--- a/src/worker/tasks/document_processing.py
+++ b/src/worker/tasks/document_processing.py
@@ -11,10 +11,20 @@ from typing import Any, Dict
from app.config import settings
from app.database.connection import get_database
from app.services.document_service import DocumentService
+from app.services.job_service import JobService
+from tasks.common.converter_utils import convert_to_pdf
from tasks.main import celery_app
logger = logging.getLogger(__name__)
+
+def get_services():
+ database = get_database()
+ document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
+ job_service = JobService(database=database)
+ return document_service, job_service
+
+
@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
def process_document(self, filepath: str) -> Dict[str, Any]:
"""
@@ -38,10 +48,8 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
task_id = self.request.id
logger.info(f"Starting document processing task {task_id} for file: {filepath}")
- database = get_database()
- document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
- from app.services.job_service import JobService
- job_service = JobService(database=database)
+ # get services
+ document_service, job_service = get_services()
job = None
try:
@@ -56,7 +64,10 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
job_service.mark_job_as_started(job_id=job.id)
logger.info(f"Job {task_id} marked as PROCESSING")
- # Step 4: Mark job as completed
+ # Step 4: Create the pdf version of the document
+ pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
+
+ # Step x: Mark job as completed
job_service.mark_job_as_completed(job_id=job.id)
logger.info(f"Job {task_id} marked as COMPLETED")
@@ -82,4 +93,3 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
# Re-raise the exception to trigger Celery retry mechanism
raise
-
diff --git a/src/worker/tasks/main.py b/src/worker/tasks/main.py
index f76c202..a32e82a 100644
--- a/src/worker/tasks/main.py
+++ b/src/worker/tasks/main.py
@@ -3,13 +3,19 @@ Celery worker for MyDocManager document processing tasks.
This module contains all Celery tasks for processing documents.
"""
+import logging
import os
from celery import Celery
+from app.config import settings
+
+
# Environment variables
-REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
+REDIS_URL = settings.get_redis_url()
+MONGODB_URL = settings.get_mongodb_url()
+
+logger = logging.getLogger(__name__)
# Initialize Celery app
celery_app = Celery(
@@ -28,9 +34,15 @@ celery_app.conf.update(
timezone="UTC",
enable_utc=True,
task_track_started=True,
- task_time_limit=300, # 5 minutes
- task_soft_time_limit=240, # 4 minutes
+ task_time_limit=300, # 5 minutes
+ task_soft_time_limit=240, # 4 minutes
)
if __name__ == "__main__":
+ # initialize temp folder if needed
+ tmp_folder = settings.get_temp_folder()
+ if not os.path.exists(tmp_folder):
+ logger.info(f"Creating temporary folder: {tmp_folder}")
+ os.makedirs(tmp_folder)
+
celery_app.start()
diff --git a/tests/common/__init__.py b/tests/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/common/test_pdf_converter.py b/tests/common/test_pdf_converter.py
new file mode 100644
index 0000000..a5718fd
--- /dev/null
+++ b/tests/common/test_pdf_converter.py
@@ -0,0 +1,55 @@
+import shutil
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
+
+
+@pytest.fixture
+def temp_dir():
+ """Create a temporary directory for output PDFs."""
+ dir_path = tempfile.mkdtemp()
+ yield dir_path
+ shutil.rmtree(dir_path)
+
+
+def test_i_can_convert_text_to_pdf(temp_dir):
+ input_txt = Path(temp_dir) / "test.txt"
+ input_txt.write_text("Hello World!\nThis is a test.")
+
+ converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
+ output_pdf = converter.convert()
+
+ assert Path(output_pdf).exists()
+ assert output_pdf.endswith(".pdf")
+
+
+def test_i_can_convert_image_to_pdf(temp_dir):
+ from PIL import Image
+
+ input_img = Path(temp_dir) / "image.png"
+ image = Image.new("RGB", (100, 100), color="red")
+ image.save(input_img)
+
+ converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
+ output_pdf = converter.convert()
+
+ assert Path(output_pdf).exists()
+ assert output_pdf.endswith(".pdf")
+
+
+def test_i_can_convert_word_to_pdf(temp_dir):
+ import docx
+
+ input_docx = Path(temp_dir) / "document.docx"
+ doc = docx.Document()
+ doc.add_paragraph("Hello Word!")
+ doc.save(input_docx)
+
+ converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
+ output_pdf = converter.convert()
+
+ assert Path(output_pdf).exists()
+ assert output_pdf.endswith(".pdf")
diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
new file mode 100644
index 0000000..4f7347e
--- /dev/null
+++ b/tests/common/test_utils.py
@@ -0,0 +1,52 @@
+import shutil
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from tasks.common.converter_utils import detect_file_type, UnsupportedFileTypeError
+
+
+@pytest.fixture
+def temp_dir():
+ """Create a temporary directory for output PDFs."""
+ dir_path = tempfile.mkdtemp()
+ yield dir_path
+ shutil.rmtree(dir_path)
+
+
+def test_i_can_detect_text_file(temp_dir):
+ txt_file = Path(temp_dir) / "sample.txt"
+ txt_file.write_text("Sample text content")
+ detected_type = detect_file_type(str(txt_file))
+ assert detected_type == "text"
+
+
+def test_i_can_detect_image_file(temp_dir):
+ from PIL import Image
+
+ img_file = Path(temp_dir) / "sample.jpg"
+ image = Image.new("RGB", (50, 50), color="blue")
+ image.save(img_file)
+
+ detected_type = detect_file_type(str(img_file))
+ assert detected_type == "image"
+
+
+def test_i_can_detect_word_file(temp_dir):
+ import docx
+
+ docx_file = Path(temp_dir) / "sample.docx"
+ doc = docx.Document()
+ doc.add_paragraph("Sample content")
+ doc.save(docx_file)
+
+ detected_type = detect_file_type(str(docx_file))
+ assert detected_type == "word"
+
+
+def test_i_cannot_detect_unsupported_file(temp_dir):
+ exe_file = Path(temp_dir) / "sample.exe"
+ exe_file.write_bytes(b'\x4D\x5A\x90\x00\x03\x00\x00\x00')
+ with pytest.raises(UnsupportedFileTypeError):
+ detect_file_type(str(exe_file))