Added Login + Working on pdf creation

This commit is contained in:
2025-09-29 23:04:48 +02:00
parent 56dec3a619
commit 78181e71be
25 changed files with 481 additions and 176 deletions

View File

@@ -7,6 +7,7 @@ billiard==4.2.1
celery==5.5.3 celery==5.5.3
certifi==2025.8.3 certifi==2025.8.3
cffi==2.0.0 cffi==2.0.0
charset-normalizer==3.4.3
click==8.2.1 click==8.2.1
click-didyoumean==0.3.1 click-didyoumean==0.3.1
click-plugins==1.1.1.2 click-plugins==1.1.1.2
@@ -26,10 +27,12 @@ importlib_metadata==8.7.0
iniconfig==2.1.0 iniconfig==2.1.0
izulu==0.50.0 izulu==0.50.0
kombu==5.5.4 kombu==5.5.4
lxml==6.0.2
mongomock==4.3.0 mongomock==4.3.0
mongomock-motor==0.0.36 mongomock-motor==0.0.36
motor==3.7.1 motor==3.7.1
packaging==25.0 packaging==25.0
pillow==11.3.0
pipdeptree==2.28.0 pipdeptree==2.28.0
pluggy==1.6.0 pluggy==1.6.0
prompt_toolkit==3.0.52 prompt_toolkit==3.0.52
@@ -41,16 +44,19 @@ pydantic_core==2.33.2
Pygments==2.19.2 Pygments==2.19.2
PyJWT==2.10.1 PyJWT==2.10.1
pymongo==4.15.1 pymongo==4.15.1
pypandoc==1.15
pytest==8.4.2 pytest==8.4.2
pytest-asyncio==1.2.0 pytest-asyncio==1.2.0
pytest-mock==3.15.1 pytest-mock==3.15.1
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
python-docx==1.2.0
python-dotenv==1.1.1 python-dotenv==1.1.1
python-magic==0.4.27 python-magic==0.4.27
python-multipart==0.0.20 python-multipart==0.0.20
pytz==2025.2 pytz==2025.2
PyYAML==6.0.2 PyYAML==6.0.2
redis==6.4.0 redis==6.4.0
reportlab==4.4.4
rsa==4.9.1 rsa==4.9.1
sentinels==1.1.1 sentinels==1.1.1
six==1.17.0 six==1.17.0

View File

@@ -7,6 +7,9 @@ WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \ libmagic1 \
file \ file \
pandoc \
ghostscript \
texlive-xetex \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Copy requirements and install dependencies # Copy requirements and install dependencies

View File

@@ -34,22 +34,6 @@ def get_redis_url() -> str:
return os.getenv("REDIS_URL", "redis://localhost:6379/0") return os.getenv("REDIS_URL", "redis://localhost:6379/0")
# def get_redis_host() -> str:
# redis_url = get_redis_url()
# if redis_url.startswith("redis://"):
# return redis_url.split("redis://")[1].split("/")[0]
# else:
# return redis_url
#
#
# def get_redis_port() -> int:
# redis_url = get_redis_url()
# if redis_url.startswith("redis://"):
# return int(redis_url.split("redis://")[1].split("/")[0].split(":")[1])
# else:
# return int(redis_url.split(":")[1])
def get_jwt_secret_key() -> str: def get_jwt_secret_key() -> str:
""" """
Get JWT secret key from environment variables. Get JWT secret key from environment variables.
@@ -114,6 +98,11 @@ def get_objects_folder() -> str:
return os.getenv("OBJECTS_FOLDER", "/objects") return os.getenv("OBJECTS_FOLDER", "/objects")
def watch_directory() -> str: def get_watch_folder() -> str:
"""Directory to monitor for new files""" """Directory to monitor for new files"""
return os.getenv("WATCH_DIRECTORY", "/watched_files") return os.getenv("WATCH_DIRECTORY", "/watched_files")
def get_temp_folder() -> str:
"""Directory to store temporary files"""
return os.getenv("TEMP_DIRECTORY", "/temp")

View File

@@ -63,17 +63,15 @@ class DocumentFileEventHandler(FileSystemEventHandler):
logger.info(f"Processing new file: {filepath}") logger.info(f"Processing new file: {filepath}")
# try: try:
from tasks.document_processing import process_document from tasks.document_processing import process_document
task_result = process_document.delay(filepath) task_result = process_document.delay(filepath)
print(task_result) task_id = task_result.task_id
print("hello world") logger.info(f"Dispatched Celery task with ID: {task_id}")
# task_id = task_result.task_id
# logger.info(f"Dispatched Celery task with ID: {task_id}")
# except Exception as e: except Exception as e:
# logger.error(f"Failed to process file {filepath}: {str(e)}") logger.error(f"Failed to process file {filepath}: {str(e)}")
# # Note: We don't re-raise the exception to keep the watcher running # Note: We don't re-raise the exception to keep the watcher running
class FileWatcher: class FileWatcher:

View File

@@ -65,12 +65,12 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
# Create and start file watcher # Create and start file watcher
file_watcher = create_file_watcher( file_watcher = create_file_watcher(
watch_directory=settings.watch_directory(), watch_directory=settings.get_watch_folder(),
document_service=document_service, document_service=document_service,
job_service=job_service job_service=job_service
) )
file_watcher.start() file_watcher.start()
logger.info(f"FileWatcher started for directory: {settings.watch_directory()}") logger.info(f"FileWatcher started for directory: {settings.get_watch_folder()}")
logger.info("Application startup completed successfully") logger.info("Application startup completed successfully")
@@ -102,7 +102,7 @@ app = FastAPI(
# Configure CORS # Configure CORS
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["http://localhost:5173"], # React frontend allow_origins=["http://localhost:5173", "http://localhost:5174"], # React frontend
allow_credentials=True, allow_credentials=True,
allow_methods=["*"], allow_methods=["*"],
allow_headers=["*"], allow_headers=["*"],

View File

@@ -5,11 +5,14 @@ email-validator==2.3.0
fastapi==0.116.1 fastapi==0.116.1
httptools==0.6.4 httptools==0.6.4
motor==3.7.1 motor==3.7.1
pillow==11.3.0
pydantic==2.11.9 pydantic==2.11.9
PyJWT==2.10.1 PyJWT==2.10.1
pymongo==4.15.0 pymongo==4.15.0
pypandoc==1.15
python-multipart==0.0.20 python-multipart==0.0.20
redis==6.4.0 redis==6.4.0
reportlab==4.4.4
uvicorn==0.35.0 uvicorn==0.35.0
python-magic==0.4.27 python-magic==0.4.27
watchdog==6.0.0 watchdog==6.0.0

View File

@@ -12,6 +12,7 @@
"axios": "^1.12.2", "axios": "^1.12.2",
"react": "^19.1.1", "react": "^19.1.1",
"react-dom": "^19.1.1", "react-dom": "^19.1.1",
"react-icons": "^5.5.0",
"react-router-dom": "^7.9.3" "react-router-dom": "^7.9.3"
}, },
"devDependencies": { "devDependencies": {
@@ -3377,6 +3378,15 @@
"react": "^19.1.1" "react": "^19.1.1"
} }
}, },
"node_modules/react-icons": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.5.0.tgz",
"integrity": "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==",
"license": "MIT",
"peerDependencies": {
"react": "*"
}
},
"node_modules/react-refresh": { "node_modules/react-refresh": {
"version": "0.17.0", "version": "0.17.0",
"resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz",

View File

@@ -14,6 +14,7 @@
"axios": "^1.12.2", "axios": "^1.12.2",
"react": "^19.1.1", "react": "^19.1.1",
"react-dom": "^19.1.1", "react-dom": "^19.1.1",
"react-icons": "^5.5.0",
"react-router-dom": "^7.9.3" "react-router-dom": "^7.9.3"
}, },
"devDependencies": { "devDependencies": {

View File

@@ -0,0 +1,35 @@
// src/assets/icons.jsx
export const SunIcon = (
<svg
xmlns="http://www.w3.org/2000/svg"
className="h-6 w-6"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth="2"
d="M12 3v1m0 16v1m8.66-9h-1M4.34 12h-1m15.36 6.36l-.7-.7M6.34 6.34l-.7-.7m12.02 12.02l-.7-.7M6.34 17.66l-.7-.7M16 12a4 4 0 11-8 0 4 4 0 018 0z"
/>
</svg>
);
export const MoonIcon = (
<svg
xmlns="http://www.w3.org/2000/svg"
className="h-6 w-6"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth="2"
d="M21 12.79A9 9 0 1111.21 3a7 7 0 0010.79 9.79z"
/>
</svg>
);

View File

@@ -8,13 +8,27 @@ import ThemeSwitcher from "../common/ThemeSwither.jsx";
* @param {Object} props - Component props * @param {Object} props - Component props
* @param {React.ReactNode} props.children - Child components to render * @param {React.ReactNode} props.children - Child components to render
*/ */
const AuthHeader = () => {
return (
<div className="navbar bg-base-100 shadow-lg">
<div className="navbar-start">
<h1 className="text-xl font-bold">MyDocManager</h1>
</div>
<div className="navbar-end">
<ThemeSwitcher/>
</div>
</div>
)
}
function AuthLayout({children}) { function AuthLayout({children}) {
return ( return (
<div className="min-h-screen bg-gradient-to-br from-primary/10 via-base-200 to-secondary/10"> <div className="min-h-screen bg-gradient-to-br from-primary/10 via-base-200 to-secondary/10">
<AuthHeader/>
{/* Main container with flex centering */} {/* Main container with flex centering */}
<div className="min-h-screen flex items-center justify-center p-4"> <div className="min-h-screen flex items-center justify-center p-4">
{/* Content wrapper for responsive spacing */} {/* Content wrapper for responsive spacing */}
<ThemeSwitcher />
<div> <div>
{children} {children}
</div> </div>

View File

@@ -96,7 +96,6 @@ function LoginForm() {
<div className="card-body"> <div className="card-body">
{/* Card Header */} {/* Card Header */}
<div className="text-center mb-6"> <div className="text-center mb-6">
<h1 className="text-3xl font-bold text-primary">My Documents DocManager</h1>
<p className="text-base-content/70 mt-2">Sign in to your account</p> <p className="text-base-content/70 mt-2">Sign in to your account</p>
</div> </div>

View File

@@ -1,8 +1,10 @@
import { useAuth } from '../../hooks/useAuth'; import {useAuth} from '../../hooks/useAuth';
import { useNavigate } from 'react-router-dom'; import {useNavigate} from 'react-router-dom';
import ThemeSwitcher from "./ThemeSwither.jsx";
import React from "react";
const Header = () => { const Header = () => {
const { user, logout } = useAuth(); const {user, logout} = useAuth();
const navigate = useNavigate(); const navigate = useNavigate();
const handleLogout = async () => { const handleLogout = async () => {
@@ -13,35 +15,8 @@ const Header = () => {
return ( return (
<div className="navbar bg-base-100 shadow-lg"> <div className="navbar bg-base-100 shadow-lg">
<div className="navbar-start"> <div className="navbar-start">
<div className="dropdown"> <h1 className="text-xl font-bold">MyDocManager</h1>
<div tabIndex={0} role="button" className="btn btn-ghost lg:hidden">
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 6h16M4 12h8m-8 6h16" />
</svg>
</div>
<ul tabIndex={0} className="menu menu-sm dropdown-content bg-base-100 rounded-box z-[1] mt-3 w-52 p-2 shadow">
<li><a href="/dashboard">Dashboard</a></li>
<li><a href="/documents">Documents</a></li>
{user?.role === 'admin' && (
<li><a href="/users">User Management</a></li>
)}
</ul>
</div>
<a className="btn btn-ghost text-xl" href="/dashboard">
MyDocManager
</a>
</div> </div>
<div className="navbar-center hidden lg:flex">
<ul className="menu menu-horizontal px-1">
<li><a href="/dashboard">Dashboard</a></li>
<li><a href="/documents">Documents</a></li>
{user?.role === 'admin' && (
<li><a href="/users">User Management</a></li>
)}
</ul>
</div>
<div className="navbar-end"> <div className="navbar-end">
<div className="dropdown dropdown-end"> <div className="dropdown dropdown-end">
<div tabIndex={0} role="button" className="btn btn-ghost btn-circle avatar"> <div tabIndex={0} role="button" className="btn btn-ghost btn-circle avatar">
@@ -59,9 +34,13 @@ const Header = () => {
</div> </div>
</li> </li>
<li><a>Settings</a></li> <li><a>Settings</a></li>
<li><button onClick={handleLogout}>Logout</button></li> <li><ThemeSwitcher/></li>
<li>
<button onClick={handleLogout}>Logout</button>
</li>
</ul> </ul>
</div> </div>
</div> </div>
</div> </div>
); );

View File

@@ -1,13 +1,17 @@
import Header from './Header'; import Header from './Header';
import {Outlet} from 'react-router-dom'; import {Outlet} from 'react-router-dom';
import Menu from "./Menu.jsx";
const Layout = () => { const Layout = () => {
return ( return (
<div className="min-h-screen bg-base-200"> <div className="min-h-screen bg-base-200">
<Header/> <Header/>
<main className="container mx-auto px-4 py-8"> <div className="flex">
<Outlet/> <aside className="w-64 min-h-screen bg-base-100 shadow-lg"><Menu/></aside>
</main> <main className="flex-1 container mx-auto px-4 py-8">
<Outlet/>
</main>
</div>
</div> </div>
); );
}; };

View File

@@ -0,0 +1,16 @@
import {FaBuffer, FaPlus} from "react-icons/fa6";
const Menu = () => {
return (
<div className="p-4">
<ul className="menu">
<li className="menu-title">Exploration</li>
<li><a><FaBuffer/>To Review</a></li>
<li className="menu-title mt-4">Catégories</li>
<li><a><i className="fas fa-plus"></i>Item</a></li>
</ul>
</div>
)
}
export default Menu;

View File

@@ -1,103 +1,29 @@
import React from "react"; import {useEffect, useState} from "react";
import {MoonIcon, SunIcon} from "../../assets/icons.jsx";
const themes = [ function ThemeSwitcher() {
"light", "dark", "cupcake" // State to store current theme
]; const [theme, setTheme] = useState("light");
// When theme changes, apply it to <html data-theme="">
useEffect(() => {
document.querySelector("html").setAttribute("data-theme", theme);
}, [theme]);
// Toggle between light and dark
const toggleTheme = () => {
setTheme(theme === "light" ? "dark" : "light");
};
export default function ThemeSwitcher() {
return ( return (
<div title="Change Theme" className="dropdown dropdown-end block"> <button
<div onClick={toggleTheme}
tabIndex={0} className="btn btn-ghost btn-circle"
role="button" >
className="btn group btn-sm gap-1.5 px-1.5 btn-ghost" {theme === "light" ? MoonIcon : SunIcon}
aria-label="Change Theme"
>
<div
className="bg-base-100 group-hover:border-base-content/20 border-base-content/10 grid shrink-0 grid-cols-2 gap-0.5 rounded-md border p-1 transition-colors">
<div className="bg-base-content size-1 rounded-full"></div>
<div className="bg-primary size-1 rounded-full"></div>
<div className="bg-secondary size-1 rounded-full"></div>
<div className="bg-accent size-1 rounded-full"></div>
</div>
<svg
width="12px"
height="12px"
className="mt-px hidden size-2 fill-current opacity-60 sm:inline-block"
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 2048 2048"
>
<path d="M1799 349l242 241-1017 1017L7 590l242-241 775 775 775-775z"></path>
</svg>
</div>
<div </button>
tabIndex={0}
className="dropdown-content bg-base-200 text-base-content rounded-box top-px h-[30.5rem] max-h-[calc(100vh-8.6rem)] overflow-y-auto border-[length:var(--border)] border-white/5 shadow-2xl outline-[length:var(--border)] outline-black/5 mt-16"
>
<ul className="menu w-56">
<li className="menu-title text-xs">Thème</li>
{themes.map((theme) => (
<li key={theme}>
<button
className="gap-3 px-2"
data-set-theme={theme}
data-act-class="[&_svg]:visible"
>
<div
data-theme={theme}
className="bg-base-100 grid shrink-0 grid-cols-2 gap-0.5 rounded-md p-1 shadow-sm"
>
<div className="bg-base-content size-1 rounded-full"></div>
<div className="bg-primary size-1 rounded-full"></div>
<div className="bg-secondary size-1 rounded-full"></div>
<div className="bg-accent size-1 rounded-full"></div>
</div>
<div className="w-32 truncate">{theme}</div>
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
viewBox="0 0 24 24"
fill="currentColor"
className="invisible h-3 w-3 shrink-0"
>
<path d="M20.285 2l-11.285 11.567-5.286-5.011-3.714 3.716 9 8.728 15-15.285z"></path>
</svg>
</button>
</li>
))}
<li>
<a href="/theme-generator/">
<svg
width="24"
height="24"
xmlns="http://www.w3.org/2000/svg"
className="h-4 w-4 fill-current"
viewBox="0 0 512 512"
>
<path d="M96,208H48a16,16,0,0,1,0-32H96a16,16,0,0,1,0,32Z"></path>
<line x1="90.25" y1="90.25" x2="124.19" y2="124.19"></line>
<path
d="M124.19,140.19a15.91,15.91,0,0,1-11.31-4.69L78.93,101.56a16,16,0,0,1,22.63-22.63l33.94,33.95a16,16,0,0,1-11.31,27.31Z"></path>
<path d="M192,112a16,16,0,0,1-16-16V48a16,16,0,0,1,32,0V96A16,16,0,0,1,192,112Z"></path>
<line x1="293.89" y1="90.25" x2="259.95" y2="124.19"></line>
<path
d="M260,140.19a16,16,0,0,1-11.31-27.31l33.94-33.95a16,16,0,0,1,22.63,22.63L271.27,135.5A15.94,15.94,0,0,1,260,140.19Z"></path>
<line x1="124.19" y1="259.95" x2="90.25" y2="293.89"></line>
<path
d="M90.25,309.89a16,16,0,0,1-11.32-27.31l33.95-33.94a16,16,0,0,1,22.62,22.63l-33.94,33.94A16,16,0,0,1,90.25,309.89Z"></path>
<path
d="M219,151.83a26,26,0,0,0-36.77,0l-30.43,30.43a26,26,0,0,0,0,36.77L208.76,276a4,4,0,0,0,5.66,0L276,214.42a4,4,0,0,0,0-5.66Z"></path>
<path
d="M472.31,405.11,304.24,237a4,4,0,0,0-5.66,0L237,298.58a4,4,0,0,0,0,5.66L405.12,472.31a26,26,0,0,0,36.76,0l30.43-30.43h0A26,26,0,0,0,472.31,405.11Z"></path>
</svg>
<div className="grow text-sm font-bold">Créer votre thème!</div>
</a>
</li>
</ul>
</div>
</div>
); );
} }
export default ThemeSwitcher;

View File

@@ -7,6 +7,9 @@ WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \ libmagic1 \
file \ file \
pandoc \
ghostscript \
texlive-xetex \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Copy requirements and install dependencies # Copy requirements and install dependencies

View File

@@ -5,9 +5,14 @@ email-validator==2.3.0
fastapi==0.116.1 fastapi==0.116.1
httptools==0.6.4 httptools==0.6.4
motor==3.7.1 motor==3.7.1
pymongo==4.15.0 pillow==11.3.0
pydantic==2.11.9 pydantic==2.11.9
PyJWT==2.10.1
pymongo==4.15.0
pypandoc==1.15
python-multipart==0.0.20
redis==6.4.0 redis==6.4.0
reportlab==4.4.4
uvicorn==0.35.0 uvicorn==0.35.0
python-magic==0.4.27 python-magic==0.4.27
watchdog==6.0.0 watchdog==6.0.0

View File

View File

@@ -0,0 +1,102 @@
import subprocess
import uuid
from pathlib import Path
import magic # python-magic
from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
class UnsupportedFileTypeError(Exception):
"""Exception raised when a file type is not supported."""
pass
def generate_uuid_filename() -> str:
"""Generate a unique filename using UUID4."""
return str(uuid.uuid4())
def detect_file_type(file_path: str) -> str:
"""
Detect the type of file using python-magic.
Returns:
'text', 'image', 'word'
Raises:
UnsupportedFileTypeError: If file type is not supported.
"""
mime = magic.from_file(file_path, mime=True)
if mime.startswith("text/"):
return "text"
elif mime.startswith("image/"):
return "image"
elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
return "word"
else:
raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
def compress_pdf(input_pdf: str, output_pdf: str, quality: str = "ebook") -> None:
"""
Compress a PDF using Ghostscript.
Args:
input_pdf (str): Path to the input PDF.
output_pdf (str): Path to save the compressed PDF.
quality (str): Ghostscript PDFSETTINGS option: screen, ebook, printer, prepress.
Raises:
FileNotFoundError: If input PDF does not exist.
RuntimeError: If Ghostscript returns an error.
"""
input_path = Path(input_pdf)
output_path = Path(output_pdf)
if not input_path.exists():
raise FileNotFoundError(f"Input PDF not found: {input_pdf}")
cmd = [
"gs",
"-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.4",
f"-dPDFSETTINGS=/{quality}",
"-dNOPAUSE",
"-dQUIET",
"-dBATCH",
f"-sOutputFile={str(output_path)}",
str(input_path),
]
result = subprocess.run(cmd)
if result.returncode != 0:
raise RuntimeError(f"Ghostscript failed with return code {result.returncode}")
def convert_to_pdf(filepath: str, output_dir: str = ".") -> str:
"""
Convert any supported file to PDF.
Args:
filepath (str): Path to the input file.
output_dir (str): Directory to save the output PDF.
Returns:
str: Path to the generated PDF.
Raises:
UnsupportedFileTypeError: If the input file type is not supported.
"""
file_type = detect_file_type(filepath)
if file_type == "text":
converter = TextToPdfConverter(filepath, output_dir=output_dir)
elif file_type == "image":
converter = ImageToPdfConverter(filepath, output_dir=output_dir)
elif file_type == "word":
converter = WordToPdfConverter(filepath, output_dir=output_dir)
else:
raise ValueError(f"Unsupported file type: {file_type}")
return converter.convert()

View File

@@ -0,0 +1,83 @@
from abc import ABC, abstractmethod
from pathlib import Path
import pypandoc
from PIL import Image
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from tasks.common.converter_utils import generate_uuid_filename
class BaseConverter(ABC):
"""Abstract base class for file converters to PDF."""
def __init__(self, input_path: str, output_dir: str = ".") -> None:
self.input_path = Path(input_path)
self.output_dir = Path(output_dir)
self.output_path = self.output_dir / f"{generate_uuid_filename()}.pdf"
@abstractmethod
def convert(self) -> str:
"""Convert input file to PDF and return the output path."""
pass
class TextToPdfConverter(BaseConverter):
"""Converter for text files to PDF."""
def convert(self) -> str:
c = canvas.Canvas(str(self.output_path), pagesize=A4)
width, height = A4
with open(self.input_path, "r", encoding="utf-8") as f:
y = height - 50
for line in f:
c.drawString(50, y, line.strip())
y -= 15
if y < 50:
c.showPage()
y = height - 50
c.save()
return str(self.output_path)
class ImageToPdfConverter(BaseConverter):
"""Converter for image files to PDF."""
def convert(self) -> str:
image = Image.open(self.input_path)
rgb_image = image.convert("RGB")
rgb_image.save(self.output_path)
return str(self.output_path)
class WordToPdfConverter(BaseConverter):
"""Converter for Word files (.docx) to PDF using pypandoc."""
def convert(self) -> str:
pypandoc.convert_file(
str(self.input_path), "pdf", outputfile=str(self.output_path)
)
return str(self.output_path)
# Placeholders for future extensions
class HtmlToPdfConverter(BaseConverter):
"""Placeholder for HTML to PDF converter."""
def convert(self) -> str:
raise NotImplementedError("HTML to PDF conversion not implemented.")
class ExcelToPdfConverter(BaseConverter):
"""Placeholder for Excel to PDF converter."""
def convert(self) -> str:
raise NotImplementedError("Excel to PDF conversion not implemented.")
class MarkdownToPdfConverter(BaseConverter):
"""Placeholder for Markdown to PDF converter."""
def convert(self) -> str:
raise NotImplementedError("Markdown to PDF conversion not implemented.")

View File

@@ -11,10 +11,20 @@ from typing import Any, Dict
from app.config import settings from app.config import settings
from app.database.connection import get_database from app.database.connection import get_database
from app.services.document_service import DocumentService from app.services.document_service import DocumentService
from app.services.job_service import JobService
from tasks.common.converter_utils import convert_to_pdf
from tasks.main import celery_app from tasks.main import celery_app
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def get_services():
database = get_database()
document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
job_service = JobService(database=database)
return document_service, job_service
@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60}) @celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
def process_document(self, filepath: str) -> Dict[str, Any]: def process_document(self, filepath: str) -> Dict[str, Any]:
""" """
@@ -38,10 +48,8 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
task_id = self.request.id task_id = self.request.id
logger.info(f"Starting document processing task {task_id} for file: {filepath}") logger.info(f"Starting document processing task {task_id} for file: {filepath}")
database = get_database() # get services
document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder()) document_service, job_service = get_services()
from app.services.job_service import JobService
job_service = JobService(database=database)
job = None job = None
try: try:
@@ -56,7 +64,10 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
job_service.mark_job_as_started(job_id=job.id) job_service.mark_job_as_started(job_id=job.id)
logger.info(f"Job {task_id} marked as PROCESSING") logger.info(f"Job {task_id} marked as PROCESSING")
# Step 4: Mark job as completed # Step 4: Create the pdf version of the document
pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
# Step x: Mark job as completed
job_service.mark_job_as_completed(job_id=job.id) job_service.mark_job_as_completed(job_id=job.id)
logger.info(f"Job {task_id} marked as COMPLETED") logger.info(f"Job {task_id} marked as COMPLETED")
@@ -82,4 +93,3 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
# Re-raise the exception to trigger Celery retry mechanism # Re-raise the exception to trigger Celery retry mechanism
raise raise

View File

@@ -3,13 +3,19 @@ Celery worker for MyDocManager document processing tasks.
This module contains all Celery tasks for processing documents. This module contains all Celery tasks for processing documents.
""" """
import logging
import os import os
from celery import Celery from celery import Celery
from app.config import settings
# Environment variables # Environment variables
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") REDIS_URL = settings.get_redis_url()
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") MONGODB_URL = settings.get_mongodb_url()
logger = logging.getLogger(__name__)
# Initialize Celery app # Initialize Celery app
celery_app = Celery( celery_app = Celery(
@@ -28,9 +34,15 @@ celery_app.conf.update(
timezone="UTC", timezone="UTC",
enable_utc=True, enable_utc=True,
task_track_started=True, task_track_started=True,
task_time_limit=300, # 5 minutes task_time_limit=300, # 5 minutes
task_soft_time_limit=240, # 4 minutes task_soft_time_limit=240, # 4 minutes
) )
if __name__ == "__main__": if __name__ == "__main__":
# initialize temp folder if needed
tmp_folder = settings.get_temp_folder()
if not os.path.exists(tmp_folder):
logger.info(f"Creating temporary folder: {tmp_folder}")
os.makedirs(tmp_folder)
celery_app.start() celery_app.start()

0
tests/common/__init__.py Normal file
View File

View File

@@ -0,0 +1,55 @@
import shutil
import tempfile
from pathlib import Path
import pytest
from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
@pytest.fixture
def temp_dir():
"""Create a temporary directory for output PDFs."""
dir_path = tempfile.mkdtemp()
yield dir_path
shutil.rmtree(dir_path)
def test_i_can_convert_text_to_pdf(temp_dir):
input_txt = Path(temp_dir) / "test.txt"
input_txt.write_text("Hello World!\nThis is a test.")
converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
output_pdf = converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")
def test_i_can_convert_image_to_pdf(temp_dir):
from PIL import Image
input_img = Path(temp_dir) / "image.png"
image = Image.new("RGB", (100, 100), color="red")
image.save(input_img)
converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
output_pdf = converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")
def test_i_can_convert_word_to_pdf(temp_dir):
import docx
input_docx = Path(temp_dir) / "document.docx"
doc = docx.Document()
doc.add_paragraph("Hello Word!")
doc.save(input_docx)
converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
output_pdf = converter.convert()
assert Path(output_pdf).exists()
assert output_pdf.endswith(".pdf")

View File

@@ -0,0 +1,52 @@
import shutil
import tempfile
from pathlib import Path
import pytest
from tasks.common.converter_utils import detect_file_type, UnsupportedFileTypeError
@pytest.fixture
def temp_dir():
"""Create a temporary directory for output PDFs."""
dir_path = tempfile.mkdtemp()
yield dir_path
shutil.rmtree(dir_path)
def test_i_can_detect_text_file(temp_dir):
txt_file = Path(temp_dir) / "sample.txt"
txt_file.write_text("Sample text content")
detected_type = detect_file_type(str(txt_file))
assert detected_type == "text"
def test_i_can_detect_image_file(temp_dir):
from PIL import Image
img_file = Path(temp_dir) / "sample.jpg"
image = Image.new("RGB", (50, 50), color="blue")
image.save(img_file)
detected_type = detect_file_type(str(img_file))
assert detected_type == "image"
def test_i_can_detect_word_file(temp_dir):
import docx
docx_file = Path(temp_dir) / "sample.docx"
doc = docx.Document()
doc.add_paragraph("Sample content")
doc.save(docx_file)
detected_type = detect_file_type(str(docx_file))
assert detected_type == "word"
def test_i_cannot_detect_unsupported_file(temp_dir):
exe_file = Path(temp_dir) / "sample.exe"
exe_file.write_bytes(b'\x4D\x5A\x90\x00\x03\x00\x00\x00')
with pytest.raises(UnsupportedFileTypeError):
detect_file_type(str(exe_file))