Compare commits
6 Commits
f5e909463a
...
477d6bf538
| Author | SHA1 | Date | |
|---|---|---|---|
| 477d6bf538 | |||
| 79bfae4ba8 | |||
| 8ae9754fde | |||
| bd52f2d296 | |||
| 62c7e46a88 | |||
| 06549c0d02 |
32
Makefile
Normal file
32
Makefile
Normal file
@@ -0,0 +1,32 @@
|
||||
.PHONY: init up down restart logs clean
|
||||
|
||||
init:
|
||||
@echo "Creating directories and setting permissions..."
|
||||
@mkdir -p ./volumes/watched_files ./volumes/objects
|
||||
@chown -R 1002:1002 ./volumes/watched_files ./volumes/objects
|
||||
@echo "✓ Directories initialized"
|
||||
|
||||
up: init
|
||||
@echo "Starting services..."
|
||||
@docker-compose up -d
|
||||
@echo "✓ Services started"
|
||||
|
||||
down:
|
||||
@docker-compose down
|
||||
|
||||
restart:
|
||||
@docker-compose restart
|
||||
|
||||
logs:
|
||||
@docker-compose logs -f
|
||||
|
||||
clean: down
|
||||
@echo "Cleaning volumes..."
|
||||
@sudo rm -rf ./volumes
|
||||
@echo "✓ Volumes cleaned"
|
||||
|
||||
rebuild: clean init
|
||||
@echo "Rebuilding images..."
|
||||
@docker-compose build --no-cache
|
||||
@docker-compose up -d
|
||||
@echo "✓ Services rebuilt and started"
|
||||
@@ -40,6 +40,8 @@ services:
|
||||
- ./src/worker/tasks:/app/tasks # <- Added: shared access to worker tasks
|
||||
- ./volumes/watched_files:/watched_files
|
||||
- ./volumes/objects:/objects
|
||||
- ./volumes/errors:/errors
|
||||
- ./volumes/ignored:/ignored
|
||||
depends_on:
|
||||
- redis
|
||||
- mongodb
|
||||
@@ -61,6 +63,9 @@ services:
|
||||
- ./src/worker:/app
|
||||
- ./src/file-processor/app:/app/app # <- Added: shared access file-processor app
|
||||
- ./volumes/watched_files:/watched_files
|
||||
- ./volumes/objects:/objects
|
||||
- ./volumes/errors:/errors
|
||||
- ./volumes/ignored:/ignored
|
||||
depends_on:
|
||||
- redis
|
||||
- mongodb
|
||||
|
||||
@@ -13,6 +13,7 @@ click-didyoumean==0.3.1
|
||||
click-plugins==1.1.1.2
|
||||
click-repl==0.3.0
|
||||
cryptography==46.0.1
|
||||
Deprecated==1.2.18
|
||||
dnspython==2.8.0
|
||||
ecdsa==0.19.1
|
||||
email-validator==2.3.0
|
||||
@@ -32,6 +33,7 @@ mongomock==4.3.0
|
||||
mongomock-motor==0.0.36
|
||||
motor==3.7.1
|
||||
packaging==25.0
|
||||
pikepdf==9.11.0
|
||||
pillow==11.3.0
|
||||
pipdeptree==2.28.0
|
||||
pluggy==1.6.0
|
||||
@@ -44,6 +46,7 @@ pydantic_core==2.33.2
|
||||
Pygments==2.19.2
|
||||
PyJWT==2.10.1
|
||||
pymongo==4.15.1
|
||||
PyMuPDF==1.26.4
|
||||
pypandoc==1.15
|
||||
pytest==8.4.2
|
||||
pytest-asyncio==1.2.0
|
||||
@@ -72,4 +75,5 @@ watchdog==6.0.0
|
||||
watchfiles==1.1.0
|
||||
wcwidth==0.2.13
|
||||
websockets==15.0.1
|
||||
wrapt==1.17.3
|
||||
zipp==3.23.0
|
||||
|
||||
@@ -12,10 +12,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
texlive-xetex \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Change the user
|
||||
USER 1002:1002
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
@@ -24,5 +28,6 @@ ENV PYTHONPATH=/app
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
|
||||
# Command will be overridden by docker-compose
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -9,6 +9,7 @@ from app.database.connection import get_database
|
||||
from app.models.auth import UserRole
|
||||
from app.models.user import UserInDB
|
||||
from app.services.auth_service import AuthService
|
||||
from app.services.document_service import DocumentService
|
||||
from app.services.user_service import UserService
|
||||
|
||||
security = HTTPBearer()
|
||||
@@ -25,6 +26,12 @@ def get_user_service() -> UserService:
|
||||
return UserService(database)
|
||||
|
||||
|
||||
def get_document_service() -> DocumentService:
|
||||
"""Dependency to get DocumentService instance."""
|
||||
database = get_database()
|
||||
return DocumentService(database)
|
||||
|
||||
|
||||
def get_current_user(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
user_service: UserService = Depends(get_user_service)
|
||||
@@ -79,7 +86,7 @@ def get_current_user(
|
||||
return user
|
||||
|
||||
|
||||
def get_admin_user(current_user: UserInDB = Depends(get_current_user)) -> UserInDB:
|
||||
def get_admin_user(current_user: UserInDB = Depends(get_current_user)) -> UserInDB:
|
||||
"""
|
||||
Dependency to ensure current user has admin role.
|
||||
|
||||
|
||||
241
src/file-processor/app/api/routes/document.py
Normal file
241
src/file-processor/app/api/routes/document.py
Normal file
@@ -0,0 +1,241 @@
|
||||
"""
|
||||
Document API routes.
|
||||
|
||||
This module provides REST endpoints for document management operations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
import fitz # PyMuPDF
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status, Path
|
||||
from starlette.responses import Response
|
||||
|
||||
from app.api.dependencies import get_document_service, get_current_user
|
||||
from app.models.document import DocumentResponse, FileDocument
|
||||
from app.services.document_service import DocumentService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["Documents"])
|
||||
|
||||
|
||||
def _count_pdf_pages(pdf_file_path: str) -> int:
|
||||
"""
|
||||
Count the number of pages in a PDF file using PyMuPDF.
|
||||
|
||||
Args:
|
||||
pdf_file_path: Path to the PDF file
|
||||
|
||||
Returns:
|
||||
Number of pages in the PDF, or 0 if file cannot be read
|
||||
"""
|
||||
try:
|
||||
with fitz.open(pdf_file_path) as doc:
|
||||
return doc.page_count
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not count pages for PDF {pdf_file_path}: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def _build_object_url(file_hash: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Build object URL from file hash.
|
||||
|
||||
Args:
|
||||
file_hash: SHA256 hash of the file
|
||||
|
||||
Returns:
|
||||
URL string or None if hash is not provided
|
||||
"""
|
||||
if not file_hash:
|
||||
return None
|
||||
return f"/api/objects/{file_hash}"
|
||||
|
||||
|
||||
def _extract_metadata_field(metadata: dict, field_name: str) -> List[str]:
|
||||
"""
|
||||
Extract a list field from metadata dictionary.
|
||||
|
||||
Args:
|
||||
metadata: Document metadata dictionary
|
||||
field_name: Name of the field to extract
|
||||
|
||||
Returns:
|
||||
List of strings, empty list if field doesn't exist or is not a list
|
||||
"""
|
||||
field_value = metadata.get(field_name, [])
|
||||
if isinstance(field_value, list):
|
||||
return [str(item) for item in field_value]
|
||||
return []
|
||||
|
||||
|
||||
def _map_file_document_to_response(
|
||||
document: FileDocument,
|
||||
document_service: DocumentService
|
||||
) -> DocumentResponse:
|
||||
"""
|
||||
Map FileDocument to DocumentResponse format.
|
||||
|
||||
Args:
|
||||
document: FileDocument instance from database
|
||||
document_service: Document service for file operations
|
||||
|
||||
Returns:
|
||||
DocumentResponse instance ready for API response
|
||||
"""
|
||||
# Calculate page count for PDF files
|
||||
page_count = 0
|
||||
if document.pdf_file_hash and document_service.exists(document.pdf_file_hash):
|
||||
pdf_path = document_service.get_document_path(document.pdf_file_hash)
|
||||
page_count = _count_pdf_pages(pdf_path)
|
||||
|
||||
# Build URLs
|
||||
thumbnail_url = _build_object_url(document.thumbnail_file_hash)
|
||||
pdf_url = _build_object_url(document.pdf_file_hash)
|
||||
|
||||
# Extract tags and categories from metadata
|
||||
tags = _extract_metadata_field(document.metadata, "tags")
|
||||
categories = _extract_metadata_field(document.metadata, "categories")
|
||||
|
||||
# Format created_at timestamp
|
||||
created_at = document.detected_at.isoformat() if document.detected_at else ""
|
||||
|
||||
as_dict = {
|
||||
"id": str(document.id),
|
||||
"name": document.filename,
|
||||
"original_file_type": document.file_type.value.upper(),
|
||||
"created_at": created_at,
|
||||
"file_size": document.file_size,
|
||||
"page_count": page_count,
|
||||
"thumbnail_url": thumbnail_url,
|
||||
"pdf_url": pdf_url,
|
||||
"tags": tags,
|
||||
"categories": categories
|
||||
}
|
||||
logger.info(f"Document: {as_dict}")
|
||||
|
||||
return DocumentResponse(**as_dict)
|
||||
|
||||
|
||||
@router.get("/documents", response_model=List[DocumentResponse])
|
||||
def list_documents(
|
||||
skip: int = Query(0, ge=0, description="Number of documents to skip"),
|
||||
limit: int = Query(100, ge=1, le=1000, description="Maximum number of documents to return"),
|
||||
UserInDB=Depends(get_current_user),
|
||||
document_service: DocumentService = Depends(get_document_service)
|
||||
) -> List[DocumentResponse]:
|
||||
"""
|
||||
Retrieve a paginated list of documents.
|
||||
|
||||
Args:
|
||||
skip: Number of documents to skip for pagination
|
||||
limit: Maximum number of documents to return
|
||||
document_service: Document service instance
|
||||
|
||||
Returns:
|
||||
List of documents in API response format
|
||||
|
||||
Raises:
|
||||
HTTPException: If database operation fails
|
||||
"""
|
||||
try:
|
||||
# Get documents from service
|
||||
documents = document_service.list_documents(skip=skip, limit=limit)
|
||||
|
||||
# Map to response format
|
||||
document_responses = [
|
||||
_map_file_document_to_response(doc, document_service)
|
||||
for doc in documents
|
||||
]
|
||||
|
||||
return document_responses
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list documents: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve documents"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/objects/{file_hash}")
|
||||
async def get_object_by_hash(
|
||||
file_hash: str = Path(..., description="SHA256 hash of the object to retrieve"),
|
||||
document_service: DocumentService = Depends(get_document_service)
|
||||
):
|
||||
"""
|
||||
Serve object content by its hash.
|
||||
|
||||
This endpoint serves files (original documents, PDFs, thumbnails) by their
|
||||
SHA256 hash. It supports all file types stored in the objects folder.
|
||||
|
||||
Args:
|
||||
file_hash: SHA256 hash of the object
|
||||
document_service: Document service dependency
|
||||
|
||||
Returns:
|
||||
FileResponse with the requested object content
|
||||
|
||||
Raises:
|
||||
HTTPException: If object not found (404) or server error (500)
|
||||
"""
|
||||
try:
|
||||
# Check if object exists
|
||||
if not document_service.exists(file_hash):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Object not found"
|
||||
)
|
||||
|
||||
# Get file path
|
||||
file_path = document_service.get_document_path(file_hash)
|
||||
|
||||
# Verify file exists on disk
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"Object {file_hash} registered but file not found at {file_path}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Object file not found on disk"
|
||||
)
|
||||
|
||||
# Determine media type based on file content
|
||||
try:
|
||||
file_content = document_service.get_document_content_by_hash(file_hash)
|
||||
if not file_content:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Object content not available"
|
||||
)
|
||||
|
||||
# Detect MIME type
|
||||
import magic
|
||||
mime_type = magic.from_buffer(file_content, mime=True)
|
||||
|
||||
# Return file content with appropriate headers
|
||||
return Response(
|
||||
content=file_content,
|
||||
media_type=mime_type,
|
||||
headers={
|
||||
"Content-Length": str(len(file_content)),
|
||||
"Cache-Control": "public, max-age=3600" # Cache for 1 hour
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading object content for hash {file_hash}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to read object content"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTP exceptions as-is
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error serving object {file_hash}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Internal server error while serving object"
|
||||
)
|
||||
@@ -105,4 +105,14 @@ def get_watch_folder() -> str:
|
||||
|
||||
def get_temp_folder() -> str:
|
||||
"""Directory to store temporary files"""
|
||||
return os.getenv("TEMP_DIRECTORY", "/temp")
|
||||
return os.getenv("TEMP_DIRECTORY", "/tmp")
|
||||
|
||||
|
||||
def get_errors_folder() -> str:
|
||||
"""Directory to store temporary files"""
|
||||
return os.getenv("ERRORS_DIRECTORY", "/errors")
|
||||
|
||||
|
||||
def get_ignored_folder() -> str:
|
||||
"""Directory to store temporary files"""
|
||||
return os.getenv("IGNORED_DIRECTORY", "/ignored")
|
||||
|
||||
@@ -4,7 +4,7 @@ MongoDB database connection management.
|
||||
This module handles MongoDB connection with fail-fast approach.
|
||||
The application will terminate if MongoDB is not accessible at startup.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
@@ -13,11 +13,14 @@ from pymongo.database import Database
|
||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
||||
|
||||
from app.config.settings import get_mongodb_url, get_mongodb_database_name
|
||||
from app.utils.security import safe_connection_string
|
||||
|
||||
# Global variables for singleton pattern
|
||||
_client: Optional[MongoClient] = None
|
||||
_database: Optional[Database] = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_mongodb_client() -> MongoClient:
|
||||
"""
|
||||
@@ -43,16 +46,16 @@ def create_mongodb_client() -> MongoClient:
|
||||
# Test connection by running admin command
|
||||
client.admin.command('ping')
|
||||
|
||||
print(f"Successfully connected to MongoDB at {mongodb_url}")
|
||||
logger.info(f"Successfully connected to MongoDB at {safe_connection_string(mongodb_url)}")
|
||||
return client
|
||||
|
||||
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
||||
print(f"ERROR: Failed to connect to MongoDB at {mongodb_url}")
|
||||
print(f"Connection error: {str(e)}")
|
||||
print("MongoDB is required for this application. Please ensure MongoDB is running and accessible.")
|
||||
logger.error(f"ERROR: Failed to connect to MongoDB at {safe_connection_string(mongodb_url)}")
|
||||
logger.error(f"Connection error: {str(e)}")
|
||||
logger.error("MongoDB is required for this application. Please ensure MongoDB is running and accessible.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Unexpected error connecting to MongoDB: {str(e)}")
|
||||
logger.error(f"ERROR: Unexpected error connecting to MongoDB: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -74,7 +77,7 @@ def get_database() -> Database:
|
||||
|
||||
database_name = get_mongodb_database_name()
|
||||
_database = _client[database_name]
|
||||
print(f"Connected to database: {database_name}")
|
||||
logger.info(f"Connected to database: {database_name}")
|
||||
|
||||
return _database
|
||||
|
||||
@@ -92,7 +95,7 @@ def close_database_connection():
|
||||
_client.close()
|
||||
_client = None
|
||||
_database = None
|
||||
print("MongoDB connection closed")
|
||||
logger.info("MongoDB connection closed")
|
||||
|
||||
|
||||
def get_mongodb_client() -> Optional[MongoClient]:
|
||||
|
||||
@@ -130,6 +130,47 @@ class FileDocumentRepository:
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Find file document by file hash with a pdf_file_hash set (not None).
|
||||
|
||||
Args:
|
||||
file_hash (str): SHA256 hash of file content
|
||||
|
||||
Returns:
|
||||
FileDocument or None: File document if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
file_doc = self.collection.find_one({"file_hash": file_hash,
|
||||
"pdf_file_hash": {"$ne": None}})
|
||||
if file_doc:
|
||||
return FileDocument(**file_doc)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def find_same_document(self, filename: str, file_hash: str):
|
||||
"""
|
||||
Find document with the same file_name and the same file hash
|
||||
|
||||
Args:
|
||||
filename (str):
|
||||
file_hash (str): SHA256 hash of file content
|
||||
|
||||
Returns:
|
||||
FileDocument or None: File document if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
file_doc = self.collection.find_one({"file_hash": file_hash,
|
||||
"filename": filename})
|
||||
if file_doc:
|
||||
return FileDocument(**file_doc)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Find file document by exact filepath.
|
||||
|
||||
@@ -30,7 +30,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
|
||||
dispatching Celery tasks, and managing processing jobs.
|
||||
"""
|
||||
|
||||
SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
|
||||
SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'}
|
||||
|
||||
def __init__(self, document_service: DocumentService, job_service: JobService):
|
||||
"""
|
||||
@@ -59,6 +59,7 @@ class DocumentFileEventHandler(FileSystemEventHandler):
|
||||
|
||||
if file_extension not in self.SUPPORTED_EXTENSIONS:
|
||||
logger.info(f"Ignoring unsupported file type: {filepath}")
|
||||
self.document_service.move_to_ignored(filepath, "unsupported file type")
|
||||
return
|
||||
|
||||
logger.info(f"Processing new file: {filepath}")
|
||||
|
||||
@@ -17,6 +17,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.api.routes.auth import router as auth_router
|
||||
from app.api.routes.users import router as users_router
|
||||
from app.api.routes.document import router as documents_router
|
||||
from app.config import settings
|
||||
from app.database.connection import get_database
|
||||
from app.file_watcher import create_file_watcher, FileWatcher
|
||||
@@ -111,7 +112,7 @@ app.add_middleware(
|
||||
# Include routers
|
||||
app.include_router(auth_router, prefix="/auth", tags=["Authentication"])
|
||||
app.include_router(users_router, prefix="/users", tags=["User Management"])
|
||||
# app.include_router(documents_router, prefix="/documents", tags=["Documents"])
|
||||
app.include_router(documents_router, prefix="/api", tags=["Documents"])
|
||||
# app.include_router(jobs_router, prefix="/jobs", tags=["Processing Jobs"])
|
||||
|
||||
|
||||
|
||||
@@ -7,10 +7,9 @@ stored in MongoDB collections.
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
||||
|
||||
from app.models.types import PyObjectId
|
||||
|
||||
@@ -49,6 +48,8 @@ class FileDocument(BaseModel):
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
|
||||
detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
|
||||
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
||||
pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content")
|
||||
thumbnail_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the thumbnail")
|
||||
encoding: str = Field(default="utf-8", description="Character encoding for text files")
|
||||
file_size: int = Field(..., ge=0, description="File size in bytes")
|
||||
mime_type: str = Field(..., description="MIME type detected")
|
||||
@@ -68,3 +69,28 @@ class FileDocument(BaseModel):
|
||||
if not v.strip():
|
||||
raise ValueError("Filename cannot be empty")
|
||||
return v.strip()
|
||||
|
||||
|
||||
class DocumentResponse(BaseModel):
|
||||
"""
|
||||
Response model for document API endpoints.
|
||||
|
||||
Represents a document in the format expected by the frontend application.
|
||||
Field names are automatically converted from snake_case to camelCase.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(alias_generator=lambda field_name: ''.join(
|
||||
word.capitalize() if i > 0 else word
|
||||
for i, word in enumerate(field_name.split('_'))
|
||||
), populate_by_name=True)
|
||||
|
||||
id: str = Field(..., description="Document unique identifier")
|
||||
name: str = Field(..., description="Document filename")
|
||||
original_file_type: str = Field(..., description="Original file type before conversion")
|
||||
created_at: str = Field(..., description="ISO timestamp when document was created")
|
||||
file_size: int = Field(..., description="File size in bytes")
|
||||
page_count: int = Field(..., description="Number of pages in the document")
|
||||
thumbnail_url: Optional[str] = Field(default=None, description="URL to document thumbnail")
|
||||
pdf_url: Optional[str] = Field(default=None, description="URL to PDF version of document")
|
||||
tags: List[str] = Field(default_factory=list, description="Document tags")
|
||||
categories: List[str] = Field(default_factory=list, description="Document categories")
|
||||
|
||||
@@ -14,6 +14,9 @@ class ProcessingStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
SAVING_OBJECT = "saving_object"
|
||||
SAVING_PDF = "saving_pdf"
|
||||
CREATING_THUMBNAIL = "creating_thumbnail"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,9 @@ while maintaining data consistency through MongoDB transactions.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
@@ -14,13 +16,28 @@ from typing import List, Optional, Dict, Any
|
||||
import magic
|
||||
from pymongo.errors import PyMongoError
|
||||
|
||||
from app.config.settings import get_objects_folder
|
||||
from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder
|
||||
from app.database.repositories.document_repository import FileDocumentRepository
|
||||
from app.models.document import (
|
||||
FileDocument,
|
||||
FileType,
|
||||
)
|
||||
from app.models.types import PyObjectId
|
||||
from app.utils.pdf_converter import convert_to_pdf
|
||||
from app.utils.pdf_thumbmail import PDFThumbnailGenerator
|
||||
from app.utils.security import generate_uuid_filename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentAlreadyExists(Exception):
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
|
||||
|
||||
class DocumentProcessingError(Exception):
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
|
||||
|
||||
class DocumentService:
|
||||
@@ -31,7 +48,11 @@ class DocumentService:
|
||||
and their content while ensuring data consistency through transactions.
|
||||
"""
|
||||
|
||||
def __init__(self, database, objects_folder: str = None):
|
||||
def __init__(self, database,
|
||||
objects_folder: str = None,
|
||||
temp_folder: str = None,
|
||||
errors_folder: str = None,
|
||||
ignored_folder: str = None):
|
||||
"""
|
||||
Initialize the document service with repository dependencies.
|
||||
|
||||
@@ -43,6 +64,9 @@ class DocumentService:
|
||||
self.db = database
|
||||
self.document_repository = FileDocumentRepository(self.db)
|
||||
self.objects_folder = objects_folder or get_objects_folder()
|
||||
self.temp_folder = temp_folder or get_temp_folder()
|
||||
self.errors_folder = errors_folder or get_errors_folder()
|
||||
self.ignored_folder = ignored_folder or get_ignored_folder()
|
||||
|
||||
def initialize(self):
|
||||
self.document_repository.initialize()
|
||||
@@ -117,7 +141,40 @@ class DocumentService:
|
||||
|
||||
return path.read_bytes()
|
||||
|
||||
def _get_document_path(self, file_hash):
|
||||
@staticmethod
|
||||
def _get_safe_path(file_path):
|
||||
"""
|
||||
If the path already exists, add a suffix to the filename.
|
||||
Increment the suffix until a safe path is found.
|
||||
:param file_path:
|
||||
:return:
|
||||
"""
|
||||
path = Path(file_path)
|
||||
|
||||
# If the path doesn't exist, return it as is
|
||||
if not path.exists():
|
||||
return file_path
|
||||
|
||||
# Split the filename and extension
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
directory = path.parent
|
||||
|
||||
# Try incrementing numbers until a unique path is found
|
||||
counter = 1
|
||||
while True:
|
||||
# Create new filename with counter
|
||||
new_filename = f"{stem}_{counter}{suffix}"
|
||||
new_path = os.path.join(directory, new_filename)
|
||||
|
||||
# Check if this new path exists
|
||||
if not os.path.exists(new_path):
|
||||
return new_path
|
||||
|
||||
# Increment counter for next attempt
|
||||
counter += 1
|
||||
|
||||
def get_document_path(self, file_hash):
|
||||
"""
|
||||
|
||||
:param file_hash:
|
||||
@@ -125,8 +182,13 @@ class DocumentService:
|
||||
"""
|
||||
return os.path.join(self.objects_folder, file_hash[:24], file_hash)
|
||||
|
||||
def exists(self, file_hash):
|
||||
if file_hash is None:
|
||||
return False
|
||||
return os.path.exists(self.get_document_path(file_hash))
|
||||
|
||||
def save_content_if_needed(self, file_hash, content: bytes):
|
||||
target_path = self._get_document_path(file_hash)
|
||||
target_path = self.get_document_path(file_hash)
|
||||
if os.path.exists(target_path):
|
||||
return
|
||||
|
||||
@@ -136,6 +198,19 @@ class DocumentService:
|
||||
with open(target_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
def move_to_errors(self, document_id, file_path):
|
||||
logger.info(f"Moving file {file_path} to error folder")
|
||||
error_file_name = f"{document_id}_{os.path.basename(file_path)}"
|
||||
error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name))
|
||||
shutil.move(file_path, error_file_path)
|
||||
|
||||
def move_to_ignored(self, file_path, reason="Unknown"):
|
||||
logger.info(f"Moving file {file_path} to ignored folder")
|
||||
ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(
|
||||
file_path)
|
||||
ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name))
|
||||
shutil.move(file_path, ignored_file_path)
|
||||
|
||||
def create_document(
|
||||
self,
|
||||
file_path: str,
|
||||
@@ -171,7 +246,16 @@ class DocumentService:
|
||||
detected_at = datetime.now()
|
||||
|
||||
try:
|
||||
logger.info(f'Creating Document for "{file_path}"')
|
||||
# Skip the document if it already exists
|
||||
same_document = self.document_repository.find_same_document(filename, file_hash)
|
||||
if same_document is not None:
|
||||
logger.info(f" Document with same hash already exists. Skipping...")
|
||||
self.move_to_ignored(file_path, f"already exists ({same_document.id})")
|
||||
raise DocumentAlreadyExists(f"Document with same hash already exists ({same_document.id})")
|
||||
|
||||
self.save_content_if_needed(file_hash, file_bytes)
|
||||
logger.info(f" Saved content to {self.get_document_path(file_hash)}")
|
||||
|
||||
# Create FileDocument
|
||||
file_data = FileDocument(
|
||||
@@ -187,14 +271,90 @@ class DocumentService:
|
||||
mime_type=mime_type
|
||||
)
|
||||
|
||||
created_file = self.document_repository.create_document(file_data)
|
||||
created_document = self.document_repository.create_document(file_data)
|
||||
logger.info(f" Created document with id '{created_document.id}'")
|
||||
|
||||
return created_file
|
||||
return created_document
|
||||
|
||||
except DocumentAlreadyExists as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
# Transaction will automatically rollback if supported
|
||||
raise PyMongoError(f"Failed to create document: {str(e)}")
|
||||
|
||||
def create_pdf(self, document_id: PyObjectId):
|
||||
"""
|
||||
For all files, a controlled pdf version will be created for standard visualization and action
|
||||
:return:
|
||||
"""
|
||||
logger.info(f"Creating PDF document for {document_id}")
|
||||
document = self.get_document_by_id(document_id)
|
||||
if document is None:
|
||||
logger.error(f" Document not found")
|
||||
raise DocumentProcessingError(f"Document {document_id} not found.")
|
||||
|
||||
# try to find another document that has the same hash
|
||||
document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
|
||||
|
||||
# the pdf will be created only if it does not exist yet
|
||||
if document_with_same_hash and self.exists(document_with_same_hash.pdf_file_hash):
|
||||
logger.info(f'Found document with same hash. Will use pdf "{document_with_same_hash.pdf_file_hash}".')
|
||||
self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash})
|
||||
return
|
||||
|
||||
# get the content of the file
|
||||
logger.info(f" No document with same hash and valid pdf found. Will create new pdf content.")
|
||||
file_bytes = self.get_document_content_by_hash(document.file_hash)
|
||||
if file_bytes is None:
|
||||
logger.error(f'Content for document "{document_id}" not found. hash = "{document.file_hash}".')
|
||||
raise DocumentProcessingError(f'Content for document "{document_id}" not found. hash = "{document.file_hash}".')
|
||||
|
||||
# create the pdf file
|
||||
temp_pdf_file = convert_to_pdf(self.get_document_path(document.file_hash), self.temp_folder)
|
||||
pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file))
|
||||
self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file))
|
||||
os.remove(temp_pdf_file) # remove the temporary file
|
||||
logger.info(f' Created new pdf file with hash "{pdf_file_hash}"')
|
||||
|
||||
# update the document
|
||||
self.update_document(document_id, {"pdf_file_hash": pdf_file_hash})
|
||||
|
||||
def create_thumbnail(self, document_id: PyObjectId):
|
||||
logger.info(f'Creating thumbnail document for "{document_id}"')
|
||||
document = self.get_document_by_id(document_id)
|
||||
if document is None:
|
||||
logger.error(f" Document not found !")
|
||||
raise DocumentProcessingError(f"Document {document_id} not found.")
|
||||
|
||||
# try to find another document that has the same hash
|
||||
document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
|
||||
|
||||
# We will use the thumbnail of the pdf if it exists
|
||||
if document_with_same_hash and self.exists(document_with_same_hash.thumbnail_file_hash):
|
||||
logger.info(f" Found document with same hash. Will use thumbnail {document_with_same_hash.thumbnail_file_hash}")
|
||||
self.update_document(document_id, {"thumbnail_file_hash": document_with_same_hash.thumbnail_file_hash})
|
||||
return
|
||||
|
||||
logger.info(f" No document with same hash and valid thumbnail found. Will create new thumbnail")
|
||||
|
||||
if not self.exists(document.pdf_file_hash):
|
||||
logger.error(f" PDF file not found.")
|
||||
raise DocumentProcessingError(f"PDF file for document {document_id} not found")
|
||||
|
||||
tmp_thumbnail_path = os.path.join(self.temp_folder, f"{generate_uuid_filename()}.png")
|
||||
with PDFThumbnailGenerator(self.get_document_path(document.pdf_file_hash)) as gen:
|
||||
# create the thumbnail
|
||||
gen.create_thumbnail(tmp_thumbnail_path, page_num=0, width=200)
|
||||
thumbnail_file_hash = self._calculate_file_hash(self._read_file_bytes(tmp_thumbnail_path))
|
||||
|
||||
# save the thumbnail to the objects folder
|
||||
self.save_content_if_needed(thumbnail_file_hash, self._read_file_bytes(tmp_thumbnail_path))
|
||||
os.remove(tmp_thumbnail_path)
|
||||
|
||||
# update the document
|
||||
self.update_document(document_id, {"thumbnail_file_hash": thumbnail_file_hash})
|
||||
logger.info(f" Created thumbnail {thumbnail_file_hash}")
|
||||
|
||||
def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
|
||||
"""
|
||||
Retrieve a document by its ID.
|
||||
@@ -219,6 +379,9 @@ class DocumentService:
|
||||
"""
|
||||
return self.document_repository.find_document_by_hash(file_hash)
|
||||
|
||||
def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]:
|
||||
return self.document_repository.find_document_with_pdf_hash(file_hash)
|
||||
|
||||
def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Retrieve a document by its file path.
|
||||
@@ -232,7 +395,7 @@ class DocumentService:
|
||||
return self.document_repository.find_document_by_filepath(filepath)
|
||||
|
||||
def get_document_content_by_hash(self, file_hash):
|
||||
target_path = self._get_document_path(file_hash)
|
||||
target_path = self.get_document_path(file_hash)
|
||||
if not os.path.exists(target_path):
|
||||
return None
|
||||
|
||||
@@ -323,7 +486,7 @@ class DocumentService:
|
||||
# If no other files reference this content, delete it
|
||||
if not remaining_files:
|
||||
try:
|
||||
os.remove(self._get_document_path(document.file_hash))
|
||||
os.remove(self.get_document_path(document.file_hash))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -111,7 +111,9 @@ class JobService:
|
||||
current_job = self.repository.find_job_by_id(job_id)
|
||||
|
||||
# Validate status transition
|
||||
if current_job.status != ProcessingStatus.PROCESSING:
|
||||
if current_job.status in (ProcessingStatus.PENDING,
|
||||
ProcessingStatus.COMPLETED,
|
||||
ProcessingStatus.FAILED):
|
||||
raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
|
||||
|
||||
# Update status
|
||||
@@ -141,7 +143,7 @@ class JobService:
|
||||
current_job = self.repository.find_job_by_id(job_id)
|
||||
|
||||
# Validate status transition
|
||||
if current_job.status != ProcessingStatus.PROCESSING:
|
||||
if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
|
||||
raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
|
||||
|
||||
# Update status with error message
|
||||
@@ -151,6 +153,11 @@ class JobService:
|
||||
error_message
|
||||
)
|
||||
|
||||
def update_job_status(self, job_id: PyObjectId,
|
||||
status: ProcessingStatus,
|
||||
error_message: str = None) -> ProcessingJob:
|
||||
return self.repository.update_job_status(job_id, status, error_message)
|
||||
|
||||
def delete_job(self, job_id: PyObjectId) -> bool:
|
||||
"""
|
||||
Delete a job from the database.
|
||||
|
||||
241
src/file-processor/app/utils/pdf_annotation.py
Normal file
241
src/file-processor/app/utils/pdf_annotation.py
Normal file
@@ -0,0 +1,241 @@
|
||||
import fitz # PyMuPDF
|
||||
|
||||
|
||||
class PDFAnnotator:
|
||||
def __init__(self, pdf_path):
|
||||
self.doc = fitz.open(pdf_path)
|
||||
|
||||
def add_highlight(self, rect, page_num=0, color=(1, 1, 0)):
|
||||
"""
|
||||
Add highlight annotation
|
||||
|
||||
Args:
|
||||
rect: (x0, y0, x1, y1) coordinates or fitz.Rect object
|
||||
page_num: Page number (0-indexed), default first page
|
||||
color: RGB tuple (0-1 range), default yellow
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_highlight_annot(rect)
|
||||
annot.set_colors(stroke=color)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_rectangle(self, rect, page_num=0, color=(1, 0, 0), width=2):
|
||||
"""
|
||||
Add rectangle annotation (border only)
|
||||
|
||||
Args:
|
||||
rect: (x0, y0, x1, y1) coordinates or fitz.Rect object
|
||||
page_num: Page number (0-indexed), default first page
|
||||
color: RGB tuple (0-1 range), default red
|
||||
width: Line width in points
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_rect_annot(rect)
|
||||
annot.set_colors(stroke=color)
|
||||
annot.set_border(width=width)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_text_note(self, point, text, page_num=0, icon="Note"):
|
||||
"""
|
||||
Add sticky note annotation
|
||||
|
||||
Args:
|
||||
point: (x, y) position tuple
|
||||
text: Note content string
|
||||
page_num: Page number (0-indexed), default first page
|
||||
icon: "Note", "Comment", "Help", "Insert", "Key", etc.
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_text_annot(point, text, icon=icon)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_free_text(self, rect, text, page_num=0, fontsize=12,
|
||||
color=(0, 0, 0)):
|
||||
"""
|
||||
Add free text annotation (visible text box)
|
||||
|
||||
Args:
|
||||
rect: (x0, y0, x1, y1) bounding box tuple or fitz.Rect
|
||||
text: Text content string
|
||||
page_num: Page number (0-indexed), default first page
|
||||
fontsize: Font size in points
|
||||
color: Text color RGB tuple (0-1 range)
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_freetext_annot(
|
||||
rect,
|
||||
text,
|
||||
fontsize=fontsize,
|
||||
text_color=color
|
||||
)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_arrow(self, start_point, end_point, page_num=0,
|
||||
color=(1, 0, 0), width=2):
|
||||
"""
|
||||
Add arrow annotation
|
||||
|
||||
Args:
|
||||
start_point: (x, y) tuple for arrow start
|
||||
end_point: (x, y) tuple for arrow end
|
||||
page_num: Page number (0-indexed), default first page
|
||||
color: Arrow color RGB tuple (0-1 range), default red
|
||||
width: Line width in points
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_line_annot(start_point, end_point)
|
||||
annot.set_colors(stroke=color)
|
||||
annot.set_border(width=width)
|
||||
# Set arrow at end - use integer constant
|
||||
annot.set_line_ends(0, 1) # 1 = ClosedArrow
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_stamp(self, rect, page_num=0, stamp_type=0):
|
||||
"""
|
||||
Add stamp annotation
|
||||
|
||||
Args:
|
||||
rect: (x0, y0, x1, y1) bounding box tuple or fitz.Rect
|
||||
page_num: Page number (0-indexed), default first page
|
||||
stamp_type: Integer for stamp type:
|
||||
0=Approved, 1=AsIs, 2=Confidential,
|
||||
3=Departmental, 4=Draft, 5=Experimental,
|
||||
6=Expired, 7=Final, 8=ForComment,
|
||||
9=ForPublicRelease, 10=NotApproved, etc.
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_stamp_annot(rect, stamp=stamp_type)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def add_redaction(self, rect, page_num=0, fill_color=(0, 0, 0)):
|
||||
"""
|
||||
Add redaction annotation (marks area for redaction)
|
||||
Note: Use apply_redactions() to permanently remove content
|
||||
|
||||
Args:
|
||||
rect: (x0, y0, x1, y1) area to redact, tuple or fitz.Rect
|
||||
page_num: Page number (0-indexed), default first page
|
||||
fill_color: RGB tuple (0-1 range) for redacted area, default black
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annot = page.add_redact_annot(rect, fill=fill_color)
|
||||
annot.update()
|
||||
return annot
|
||||
|
||||
def apply_redactions(self, page_num=0, images=2, graphics=2, text=2):
|
||||
"""
|
||||
Apply all redaction annotations on a page (permanent removal)
|
||||
|
||||
Args:
|
||||
page_num: Page number (0-indexed), default first page
|
||||
images: 2=remove, 1=blank, 0=ignore
|
||||
graphics: 2=remove, 1=blank, 0=ignore
|
||||
text: 2=remove, 1=blank, 0=ignore
|
||||
|
||||
Returns:
|
||||
True if redactions were applied, False otherwise
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
# Check if page has redaction annotations
|
||||
has_redactions = any(annot.type[0] == 12 for annot in page.annots())
|
||||
|
||||
if has_redactions:
|
||||
page.apply_redactions(images=images, graphics=graphics, text=text)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_all_annotations(self, page_num=0):
|
||||
"""
|
||||
Retrieve all annotations from a page
|
||||
|
||||
Args:
|
||||
page_num: Page number (0-indexed), default first page
|
||||
|
||||
Returns:
|
||||
List of dicts with annotation information
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
annotations = []
|
||||
|
||||
for annot in page.annots():
|
||||
info = {
|
||||
'type': annot.type[1], # Annotation type name
|
||||
'rect': annot.rect,
|
||||
'content': annot.info.get('content', ''),
|
||||
'author': annot.info.get('title', ''),
|
||||
'created': annot.info.get('creationDate', ''),
|
||||
'colors': annot.colors
|
||||
}
|
||||
annotations.append(info)
|
||||
|
||||
return annotations
|
||||
|
||||
def remove_all_annotations(self, page_num=0):
|
||||
"""
|
||||
Remove all annotations from a page
|
||||
|
||||
Args:
|
||||
page_num: Page number (0-indexed), default first page
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
for annot in page.annots():
|
||||
page.delete_annot(annot)
|
||||
|
||||
def save(self, output_path):
|
||||
"""Save the annotated PDF"""
|
||||
self.doc.save(output_path)
|
||||
|
||||
def close(self):
|
||||
self.doc.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
with PDFAnnotator("input.pdf") as annotator:
|
||||
# Add yellow highlight
|
||||
annotator.add_highlight((100, 100, 300, 120), page_num=0,
|
||||
color=(1, 1, 0))
|
||||
|
||||
# Add red rectangle border
|
||||
annotator.add_rectangle((100, 150, 300, 250), page_num=0,
|
||||
color=(1, 0, 0), width=3)
|
||||
|
||||
# Add sticky note
|
||||
annotator.add_text_note((400, 100), "This is important!",
|
||||
page_num=0, icon="Comment")
|
||||
|
||||
# Add visible text box
|
||||
annotator.add_free_text((100, 300, 400, 350), "DRAFT VERSION",
|
||||
page_num=0, fontsize=20, color=(1, 0, 0))
|
||||
|
||||
# Add arrow pointing to something
|
||||
annotator.add_arrow((450, 100), (500, 200), page_num=0,
|
||||
color=(0, 0, 1), width=2)
|
||||
|
||||
# Add "Approved" stamp
|
||||
annotator.add_stamp((450, 300, 550, 350), page_num=0, stamp_type=0)
|
||||
|
||||
# Add redaction (black box over sensitive info)
|
||||
annotator.add_redaction((100, 400, 300, 420), page_num=0)
|
||||
annotator.apply_redactions(page_num=0)
|
||||
|
||||
# List all annotations
|
||||
annots = annotator.get_all_annotations(page_num=0)
|
||||
print(f"Found {len(annots)} annotations:")
|
||||
for a in annots:
|
||||
print(f" - {a['type']} at {a['rect']}")
|
||||
|
||||
# Save annotated PDF
|
||||
annotator.save("output_annotated.pdf")
|
||||
210
src/file-processor/app/utils/pdf_converter.py
Normal file
210
src/file-processor/app/utils/pdf_converter.py
Normal file
@@ -0,0 +1,210 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import uuid
|
||||
from abc import ABC
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
|
||||
import pikepdf
|
||||
import pypandoc
|
||||
from PIL import Image
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from tasks.common.converter_utils import detect_file_type
|
||||
|
||||
|
||||
class BaseConverter(ABC):
|
||||
"""Abstract base class for file converters to PDF."""
|
||||
|
||||
def __init__(self, input_path: str, output_dir: str = ".") -> None:
|
||||
self.input_path = Path(input_path)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_path = self.output_dir / f"{self.generate_uuid_filename()}.pdf"
|
||||
|
||||
def convert(self) -> Self:
|
||||
"""Convert input file to PDF and return the output path."""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def generate_uuid_filename() -> str:
|
||||
"""Generate a unique filename using UUID4."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def get_deterministic_date(self) -> str:
|
||||
"""
|
||||
Generate a deterministic date based on file content.
|
||||
This ensures the same file always produces the same PDF.
|
||||
"""
|
||||
# Option 1: Use a fixed date
|
||||
# return "D:20000101000000"
|
||||
|
||||
# Option 2: Generate date from content hash (recommended)
|
||||
with open(self.input_path, 'rb') as f:
|
||||
content = f.read()
|
||||
content_hash = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Use first 14 characters of hash to create a valid date
|
||||
# Format: D:YYYYMMDDHHmmss
|
||||
hash_int = int(content_hash[:14], 16)
|
||||
|
||||
# Create a date between 2000-2099 to keep it reasonable
|
||||
year = 2000 + (hash_int % 100)
|
||||
month = 1 + (hash_int % 12)
|
||||
day = 1 + (hash_int % 28) # Stay safe with 28 days
|
||||
hour = hash_int % 24
|
||||
minute = hash_int % 60
|
||||
second = hash_int % 60
|
||||
|
||||
return f"D:{year:04d}{month:02d}{day:02d}{hour:02d}{minute:02d}{second:02d}"
|
||||
|
||||
def get_file_creation_date(self):
|
||||
# Get file creation time (or modification time)
|
||||
ts = os.path.getctime(self.input_path) # getmtime(self.input_path) for last modification
|
||||
dt = datetime.datetime.fromtimestamp(ts)
|
||||
|
||||
# PDF expects format D:YYYYMMDDHHmmss
|
||||
creation_date = dt.strftime("D:%Y%m%d%H%M%S")
|
||||
return creation_date
|
||||
|
||||
def clean_pdf(self) -> Self:
|
||||
"""Remove all non-deterministic metadata from PDF."""
|
||||
with pikepdf.open(self.output_path, allow_overwriting_input=True) as pdf:
|
||||
# Remove XMP metadata if it exists
|
||||
if hasattr(pdf.Root, 'Metadata'):
|
||||
del pdf.Root.Metadata
|
||||
|
||||
# Clear all document info by deleting each key
|
||||
for key in list(pdf.docinfo.keys()):
|
||||
del pdf.docinfo[key]
|
||||
|
||||
# Set deterministic metadata
|
||||
pdf.docinfo["/Producer"] = "MyConverter"
|
||||
pdf.docinfo["/Creator"] = "MyConverter"
|
||||
pdf.docinfo["/CreationDate"] = self.get_deterministic_date()
|
||||
pdf.docinfo["/ModDate"] = self.get_deterministic_date()
|
||||
pdf.docinfo["/Title"] = self.input_path.name
|
||||
|
||||
# Save with deterministic IDs
|
||||
# compress=True ensures consistent compression
|
||||
# deterministic_id=True (if available) or static_id=True
|
||||
pdf.save(
|
||||
self.output_path,
|
||||
fix_metadata_version=True,
|
||||
compress_streams=True,
|
||||
stream_decode_level=pikepdf.StreamDecodeLevel.generalized,
|
||||
object_stream_mode=pikepdf.ObjectStreamMode.disable,
|
||||
deterministic_id=True # Use this if pikepdf >= 8.0.0, otherwise use static_id=True
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class TextToPdfConverter(BaseConverter):
|
||||
"""Converter for text files to PDF."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
c = canvas.Canvas(str(self.output_path), pagesize=A4)
|
||||
|
||||
# Fix metadata with deterministic values
|
||||
info = c._doc.info
|
||||
info.producer = "MyConverter"
|
||||
info.creationDate = self.get_file_creation_date()
|
||||
info.title = os.path.basename(self.input_path)
|
||||
|
||||
width, height = A4
|
||||
with open(self.input_path, "r", encoding="utf-8") as f:
|
||||
y = height - 50
|
||||
for line in f:
|
||||
c.drawString(50, y, line.strip())
|
||||
y -= 15
|
||||
if y < 50:
|
||||
c.showPage()
|
||||
y = height - 50
|
||||
|
||||
c.save()
|
||||
return self
|
||||
|
||||
|
||||
class PdfToPdfConverter(BaseConverter):
|
||||
"""Converter for PDF files to PDF."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
# copy self.input_path to self.output_path
|
||||
os.system(f"cp {self.input_path} {self.output_path}")
|
||||
return self
|
||||
|
||||
|
||||
class ImageToPdfConverter(BaseConverter):
|
||||
"""Converter for image files to PDF."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
image = Image.open(self.input_path)
|
||||
rgb_image = image.convert("RGB")
|
||||
rgb_image.save(self.output_path)
|
||||
return self
|
||||
|
||||
|
||||
class WordToPdfConverter(BaseConverter):
|
||||
"""Converter for Word files (.docx) to PDF using pypandoc."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
pypandoc.convert_file(
|
||||
str(self.input_path), "pdf", outputfile=str(self.output_path)
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
# Placeholders for future extensions
|
||||
class HtmlToPdfConverter(BaseConverter):
|
||||
"""Placeholder for HTML to PDF converter."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
raise NotImplementedError("HTML to PDF conversion not implemented.")
|
||||
|
||||
|
||||
class ExcelToPdfConverter(BaseConverter):
|
||||
"""Placeholder for Excel to PDF converter."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
raise NotImplementedError("Excel to PDF conversion not implemented.")
|
||||
|
||||
|
||||
class MarkdownToPdfConverter(BaseConverter):
|
||||
"""Placeholder for Markdown to PDF converter."""
|
||||
|
||||
def convert(self) -> Self:
|
||||
raise NotImplementedError("Markdown to PDF conversion not implemented.")
|
||||
|
||||
|
||||
def convert_to_pdf(filepath: str, output_dir: str = ".") -> str:
|
||||
"""
|
||||
Convert any supported file to PDF.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the input file.
|
||||
output_dir (str): Directory to save the output PDF.
|
||||
|
||||
Returns:
|
||||
str: Path to the generated PDF.
|
||||
|
||||
Raises:
|
||||
UnsupportedFileTypeError: If the input file type is not supported.
|
||||
"""
|
||||
file_type = detect_file_type(filepath)
|
||||
|
||||
if file_type == "text":
|
||||
converter = TextToPdfConverter(filepath, output_dir=output_dir)
|
||||
elif file_type == "image":
|
||||
converter = ImageToPdfConverter(filepath, output_dir=output_dir)
|
||||
elif file_type == "word":
|
||||
converter = WordToPdfConverter(filepath, output_dir=output_dir)
|
||||
elif file_type == "pdf":
|
||||
converter = PdfToPdfConverter(filepath, output_dir=output_dir)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {file_type}")
|
||||
|
||||
converter.convert()
|
||||
converter.clean_pdf()
|
||||
return str(converter.output_path)
|
||||
167
src/file-processor/app/utils/pdf_thumbmail.py
Normal file
167
src/file-processor/app/utils/pdf_thumbmail.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from pathlib import Path
|
||||
|
||||
import fitz # PyMuPDF
|
||||
|
||||
|
||||
class PDFThumbnailGenerator:
|
||||
def __init__(self, pdf_path):
|
||||
"""
|
||||
Initialize PDF thumbnail generator
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the PDF file (string or Path object)
|
||||
"""
|
||||
self.pdf_path = pdf_path
|
||||
self.doc = fitz.open(pdf_path)
|
||||
|
||||
def create_thumbnail(self, output_path, page_num=0, width=200, rotation=0, zoom_factor=1.0):
|
||||
"""
|
||||
Create a thumbnail with zoom and rotation
|
||||
|
||||
Args:
|
||||
output_path: Path to save the thumbnail (string or Path)
|
||||
page_num: Page number (0-indexed), default first page
|
||||
width: Desired width in pixels, default 200
|
||||
rotation: Rotation angle in degrees (0, 90, 180, 270), default 0
|
||||
zoom_factor: Additional zoom multiplier (1.0 = normal, 2.0 = 2x), default 1.0
|
||||
|
||||
Returns:
|
||||
Dict with thumbnail info (width, height, rotation, zoom)
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
|
||||
# Apply rotation to page
|
||||
page.set_rotation(rotation)
|
||||
|
||||
# Calculate zoom to achieve desired width
|
||||
base_zoom = width / page.rect.width
|
||||
final_zoom = base_zoom * zoom_factor
|
||||
|
||||
# Create transformation matrix
|
||||
mat = fitz.Matrix(final_zoom, final_zoom)
|
||||
|
||||
# Render page to pixmap
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
|
||||
# Save thumbnail
|
||||
pix.save(output_path)
|
||||
|
||||
return {
|
||||
'width': pix.width,
|
||||
'height': pix.height,
|
||||
'rotation': rotation,
|
||||
'zoom': zoom_factor
|
||||
}
|
||||
|
||||
def create_cropped_thumbnail(self, output_path, crop_rect=None, page_num=0, width=200):
|
||||
"""
|
||||
Create a thumbnail of a specific region (zoom on area)
|
||||
|
||||
Args:
|
||||
output_path: Path to save the thumbnail (string or Path)
|
||||
crop_rect: Tuple (x0, y0, x1, y1) in PDF coordinates for cropping,
|
||||
or None for full page, default None
|
||||
page_num: Page number (0-indexed), default first page
|
||||
width: Desired width in pixels, default 200
|
||||
|
||||
Returns:
|
||||
Tuple (width, height) of the generated thumbnail
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
|
||||
if crop_rect:
|
||||
# Create rectangle for cropping
|
||||
rect = fitz.Rect(crop_rect)
|
||||
zoom = width / rect.width
|
||||
else:
|
||||
rect = page.rect
|
||||
zoom = width / page.rect.width
|
||||
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
|
||||
# Render only the specified rectangle
|
||||
pix = page.get_pixmap(matrix=mat, clip=rect)
|
||||
pix.save(output_path)
|
||||
|
||||
return pix.width, pix.height
|
||||
|
||||
def get_page_info(self, page_num=0):
|
||||
"""
|
||||
Get information about a specific page
|
||||
|
||||
Args:
|
||||
page_num: Page number (0-indexed), default first page
|
||||
|
||||
Returns:
|
||||
Dict with page information (width, height, rotation, number, total_pages)
|
||||
"""
|
||||
page = self.doc[page_num]
|
||||
return {
|
||||
'width': page.rect.width,
|
||||
'height': page.rect.height,
|
||||
'rotation': page.rotation,
|
||||
'number': page_num + 1,
|
||||
'total_pages': len(self.doc)
|
||||
}
|
||||
|
||||
def create_multi_resolution_thumbnails(self, output_folder, page_num=0, sizes=(150, 300, 600)):
|
||||
"""
|
||||
Create multiple thumbnails at different resolutions
|
||||
|
||||
Args:
|
||||
output_folder: Folder path to save thumbnails (string or Path)
|
||||
page_num: Page number (0-indexed), default first page
|
||||
sizes: List of widths in pixels, default [150, 300, 600]
|
||||
|
||||
Returns:
|
||||
Dict mapping each size to thumbnail info
|
||||
"""
|
||||
output_folder = Path(output_folder)
|
||||
output_folder.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
results = {}
|
||||
for size in sizes:
|
||||
output_path = output_folder / f"thumb_{size}px.png"
|
||||
info = self.create_thumbnail(output_path, page_num=page_num, width=size)
|
||||
results[size] = info
|
||||
|
||||
return results
|
||||
|
||||
def close(self):
|
||||
"""Close the PDF document and free resources"""
|
||||
self.doc.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Basic usage with context manager
|
||||
with PDFThumbnailGenerator("example.pdf") as gen:
|
||||
# Standard thumbnail
|
||||
gen.create_thumbnail("thumb_standard.png", page_num=0, width=200)
|
||||
|
||||
# Rotated thumbnail
|
||||
gen.create_thumbnail("thumb_rotated.png", page_num=0,
|
||||
width=200, rotation=90)
|
||||
|
||||
# Zoomed thumbnail (2x zoom)
|
||||
gen.create_thumbnail("thumb_zoomed.png", page_num=0,
|
||||
width=200, zoom_factor=2.0)
|
||||
|
||||
# Cropped/zoomed on specific area (x0, y0, x1, y1)
|
||||
gen.create_cropped_thumbnail("thumb_crop.png",
|
||||
crop_rect=(100, 100, 400, 400),
|
||||
page_num=0, width=300)
|
||||
|
||||
# Multiple resolutions
|
||||
gen.create_multi_resolution_thumbnails("thumbnails/", page_num=0,
|
||||
sizes=[150, 300, 600])
|
||||
|
||||
# Get page information
|
||||
info = gen.get_page_info(page_num=0)
|
||||
print(f"Page info: {info}")
|
||||
@@ -4,9 +4,10 @@ Password security utilities using bcrypt for secure password hashing.
|
||||
This module provides secure password hashing and verification functions
|
||||
using the bcrypt algorithm with automatic salt generation.
|
||||
"""
|
||||
import re
|
||||
import uuid
|
||||
|
||||
import bcrypt
|
||||
from typing import Union
|
||||
|
||||
|
||||
def hash_password(password: str) -> str:
|
||||
@@ -71,4 +72,33 @@ def verify_password(password: str, hashed_password: str) -> bool:
|
||||
# bcrypt raises ValueError for malformed hashes
|
||||
raise RuntimeError(f"Invalid hash format: {str(e)}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to verify password: {str(e)}")
|
||||
raise RuntimeError(f"Failed to verify password: {str(e)}")
|
||||
|
||||
|
||||
def generate_uuid_filename() -> str:
|
||||
"""Generate a unique filename using UUID4."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def safe_connection_string(connection_string: str) -> str:
|
||||
"""
|
||||
Mask the password in a MongoDB connection string.
|
||||
|
||||
Args:
|
||||
connection_string (str): The complete MongoDB connection string
|
||||
|
||||
Returns:
|
||||
str: The connection string with password replaced by asterisks
|
||||
|
||||
Example:
|
||||
>>> mask_mongodb_password("mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin")
|
||||
"mongodb://admin:***@mongodb:27017/mydocmanager?authSource=admin"
|
||||
"""
|
||||
# Pattern to detect password in MongoDB URL
|
||||
# Format: mongodb://username:password@host:port/database
|
||||
pattern = r'(mongodb://[^:]+:)([^@]+)(@.*)'
|
||||
|
||||
# Replace password with asterisks
|
||||
masked_string = re.sub(pattern, r'\1*****\3', connection_string)
|
||||
|
||||
return masked_string
|
||||
|
||||
@@ -5,10 +5,12 @@ email-validator==2.3.0
|
||||
fastapi==0.116.1
|
||||
httptools==0.6.4
|
||||
motor==3.7.1
|
||||
pikepdf==9.11.0
|
||||
pillow==11.3.0
|
||||
pydantic==2.11.9
|
||||
PyJWT==2.10.1
|
||||
pymongo==4.15.0
|
||||
PyMuPDF==1.26.4
|
||||
pypandoc==1.15
|
||||
python-multipart==0.0.20
|
||||
redis==6.4.0
|
||||
|
||||
@@ -1,12 +1,93 @@
|
||||
# React + Vite
|
||||
|
||||
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
||||
# MyDocManager Frontend
|
||||
|
||||
Currently, two official plugins are available:
|
||||
## Overview
|
||||
MyDocManager Frontend is a modern web application built with React and Vite that serves as the user interface for the MyDocManager document management system. The application provides a seamless experience for users to manage, process, and organize their documents with an intuitive and responsive interface.
|
||||
|
||||
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh
|
||||
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
|
||||
## Project Structure
|
||||
frontend/
|
||||
├── public/ # Public assets and static files
|
||||
├── src/ # Source code
|
||||
│ ├── assets/ # Icons, images, and other static assets
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ │ ├── auth/ # Authentication-related components
|
||||
│ │ └── common/ # Shared components (Header, Layout, etc.)
|
||||
│ ├── contexts/ # React contexts for state management
|
||||
│ ├── hooks/ # Custom React hooks
|
||||
│ ├── pages/ # Page components representing full views
|
||||
│ ├── services/ # API service interfaces
|
||||
│ └── utils/ # Utility functions and helpers
|
||||
├── Dockerfile # Container configuration for deployment
|
||||
├── package.json # Dependencies and scripts
|
||||
├── tailwind.config.js # Tailwind CSS configuration
|
||||
└── vite.config.js # Vite bundler configuration
|
||||
|
||||
## Expanding the ESLint configuration
|
||||
|
||||
If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.
|
||||
|
||||
## Key Components
|
||||
|
||||
### Authentication
|
||||
- **AuthContext**: Provides authentication state and methods throughout the application
|
||||
- **AuthLayout**: Layout wrapper specifically for authentication screens
|
||||
- **LoginForm**: Form component for user authentication
|
||||
- **ProtectedRoute**: Route guard that ensures authenticated access to protected pages
|
||||
|
||||
### UI Components
|
||||
- **Layout**: Main application layout structure with menu and content areas
|
||||
- **Header**: Application header with navigation and user controls
|
||||
- **Menu**: Side navigation menu with application links
|
||||
- **ThemeSwitcher**: Toggle for switching between light and dark themes
|
||||
|
||||
### Pages
|
||||
- **LoginPage**: User authentication page
|
||||
- **DashboardPage**: Main dashboard view for authenticated users
|
||||
|
||||
### Services
|
||||
- **authService**: Handles API communication for authentication operations
|
||||
- **api**: Base API utility for making HTTP requests to the backend
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
- Node.js (latest LTS version)
|
||||
- npm or yarn package manager
|
||||
|
||||
### Installation
|
||||
1. Clone the repository
|
||||
2. Navigate to the frontend directory
|
||||
3. Install dependencies:
|
||||
```
|
||||
npm install
|
||||
```
|
||||
|
||||
### Development
|
||||
Run the development server:
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
This will start the application in development mode at http://localhost:5173
|
||||
|
||||
### Building for Production
|
||||
Create a production build:
|
||||
```
|
||||
npm run build
|
||||
```
|
||||
|
||||
|
||||
## Technologies
|
||||
- React 19.1.1
|
||||
- Vite 7.1.2
|
||||
- Tailwind CSS 4.1.13
|
||||
- DaisyUI 5.1.24
|
||||
- React Router 7.9.3
|
||||
- Axios for API requests
|
||||
|
||||
## Features
|
||||
- Responsive design with Tailwind CSS
|
||||
- Authentication and authorization
|
||||
- Light/dark theme support
|
||||
- Document management interface
|
||||
- Secure API communication
|
||||
|
||||
## Project Integration
|
||||
This frontend application works in conjunction with the backend services and workers defined in other parts of the MyDocManager project to provide a complete document management solution.
|
||||
@@ -4,6 +4,7 @@ import ProtectedRoute from './components/common/ProtectedRoute';
|
||||
import Layout from './components/common/Layout';
|
||||
import LoginPage from './pages/LoginPage';
|
||||
import DashboardPage from './pages/DashboardPage';
|
||||
import DocumentsPage from './pages/DocumentsPage';
|
||||
|
||||
function App() {
|
||||
return (
|
||||
@@ -16,7 +17,8 @@ function App() {
|
||||
|
||||
{/* Protected Routes */}
|
||||
<Route path="/" element={<ProtectedRoute><Layout /></ProtectedRoute>}>
|
||||
<Route index element={<Navigate to="/dashboard" replace />} />
|
||||
<Route index element={<Navigate to="/documents" replace />} />
|
||||
<Route path="documents" element={<DocumentsPage />} />
|
||||
<Route path="dashboard" element={<DashboardPage />} />
|
||||
<Route path="documents" element={<div>Documents Page - Coming Soon</div>} />
|
||||
<Route path="users" element={<div>User Management - Coming Soon</div>} />
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import {FaBuffer, FaPlus} from "react-icons/fa6";
|
||||
import { Link } from "react-router-dom";
|
||||
|
||||
const Menu = () => {
|
||||
return (
|
||||
<div className="p-4">
|
||||
<ul className="menu">
|
||||
<li className="menu-title">Exploration</li>
|
||||
<li><a><FaBuffer/>To Review</a></li>
|
||||
<li><Link to="/dashboard"><FaBuffer/>Dashboard</Link></li>
|
||||
<li><Link to="/documents"><FaBuffer/>To Review</Link></li>
|
||||
<li className="menu-title mt-4">Catégories</li>
|
||||
<li><a><i className="fas fa-plus"></i>Item</a></li>
|
||||
</ul>
|
||||
|
||||
68
src/frontend/src/components/documents/DeleteConfirmModal.jsx
Normal file
68
src/frontend/src/components/documents/DeleteConfirmModal.jsx
Normal file
@@ -0,0 +1,68 @@
|
||||
/**
|
||||
* DeleteConfirmModal Component
|
||||
* Modal dialog to confirm document deletion
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
|
||||
/**
|
||||
* DeleteConfirmModal component
|
||||
* @param {Object} props
|
||||
* @param {boolean} props.isOpen - Whether the modal is open
|
||||
* @param {Object|null} props.document - Document to delete
|
||||
* @param {function(): void} props.onClose - Callback when modal is closed
|
||||
* @param {function(): void} props.onConfirm - Callback when deletion is confirmed
|
||||
* @param {boolean} props.isDeleting - Whether deletion is in progress
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const DeleteConfirmModal = ({
|
||||
isOpen,
|
||||
document,
|
||||
onClose,
|
||||
onConfirm,
|
||||
isDeleting = false
|
||||
}) => {
|
||||
if (!isOpen || !document) return null;
|
||||
|
||||
return (
|
||||
<dialog className="modal modal-open">
|
||||
<div className="modal-box">
|
||||
<h3 className="font-bold text-lg">Confirm Deletion</h3>
|
||||
<p className="py-4">
|
||||
Are you sure you want to delete <span className="font-semibold">"{document.name}"</span>?
|
||||
</p>
|
||||
<p className="text-sm text-gray-500">
|
||||
This action cannot be undone.
|
||||
</p>
|
||||
<div className="modal-action">
|
||||
<button
|
||||
className="btn btn-ghost"
|
||||
onClick={onClose}
|
||||
disabled={isDeleting}
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-error"
|
||||
onClick={onConfirm}
|
||||
disabled={isDeleting}
|
||||
>
|
||||
{isDeleting ? (
|
||||
<>
|
||||
<span className="loading loading-spinner loading-sm"></span>
|
||||
Deleting...
|
||||
</>
|
||||
) : (
|
||||
'Delete'
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<form method="dialog" className="modal-backdrop" onClick={onClose}>
|
||||
<button disabled={isDeleting}>close</button>
|
||||
</form>
|
||||
</dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default DeleteConfirmModal;
|
||||
193
src/frontend/src/components/documents/DocumentCard.jsx
Normal file
193
src/frontend/src/components/documents/DocumentCard.jsx
Normal file
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* DocumentCard Component
|
||||
* Displays a document as a DaisyUI card with thumbnail and metadata
|
||||
* Supports different view modes: small, large, and detail
|
||||
*/
|
||||
|
||||
import React, { memo } from 'react';
|
||||
|
||||
/**
|
||||
* Formats file size to human-readable format
|
||||
* @param {number} bytes - File size in bytes
|
||||
* @returns {string} Formatted file size
|
||||
*/
|
||||
const formatFileSize = (bytes) => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats date to localized string
|
||||
* @param {string} dateString - ISO date string
|
||||
* @returns {string} Formatted date
|
||||
*/
|
||||
const formatDate = (dateString) => {
|
||||
return new Date(dateString).toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric'
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* DocumentCard component
|
||||
* @param {Object} props
|
||||
* @param {Object} props.document - Document object
|
||||
* @param {'small'|'large'|'detail'} props.viewMode - Current view mode
|
||||
* @param {function(): void} props.onEdit - Callback when edit is clicked
|
||||
* @param {function(): void} props.onDelete - Callback when delete is clicked
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const DocumentCard = memo(({ document, viewMode, onEdit, onDelete }) => {
|
||||
const { name, originalFileType, thumbnailUrl, pageCount, fileSize, createdAt, tags, categories } = document;
|
||||
|
||||
// Determine card classes based on view mode
|
||||
const getCardClasses = () => {
|
||||
const baseClasses = 'card bg-base-100 shadow-xl hover:shadow-2xl transition-shadow group relative';
|
||||
|
||||
switch (viewMode) {
|
||||
case 'small':
|
||||
return `${baseClasses} w-full`;
|
||||
case 'large':
|
||||
return `${baseClasses} w-full`;
|
||||
case 'detail':
|
||||
return `${baseClasses} w-full`;
|
||||
default:
|
||||
return baseClasses;
|
||||
}
|
||||
};
|
||||
|
||||
// Render thumbnail with hover actions
|
||||
const renderThumbnail = () => (
|
||||
<figure className="relative overflow-hidden">
|
||||
<img
|
||||
src={`http://localhost:8000${thumbnailUrl}`}
|
||||
alt={`${thumbnailUrl} thumbnail`}
|
||||
className={`w-full object-cover ${
|
||||
viewMode === 'small' ? 'h-32' : viewMode === 'large' ? 'h-48' : 'h-64'
|
||||
}`}
|
||||
loading="lazy"
|
||||
/>
|
||||
|
||||
{/* Hover overlay with actions */}
|
||||
<div className="absolute top-2 right-2 flex gap-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
<button
|
||||
className="btn btn-sm btn-circle btn-primary"
|
||||
onClick={onEdit}
|
||||
aria-label="Edit document"
|
||||
title="Edit"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm btn-circle btn-error"
|
||||
onClick={onDelete}
|
||||
aria-label="Delete document"
|
||||
title="Delete"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* File type badge */}
|
||||
<div className="absolute bottom-2 left-2">
|
||||
<span className="badge badge-accent badge-sm">{originalFileType}</span>
|
||||
</div>
|
||||
</figure>
|
||||
);
|
||||
|
||||
// Render card body based on view mode
|
||||
const renderCardBody = () => {
|
||||
if (viewMode === 'small') {
|
||||
return (
|
||||
<div className="card-body p-3">
|
||||
<h3 className="card-title text-sm truncate" title={name}>{name}</h3>
|
||||
<p className="text-xs text-gray-500">{pageCount} page{pageCount > 1 ? 's' : ''}</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (viewMode === 'large') {
|
||||
return (
|
||||
<div className="card-body p-4">
|
||||
<h3 className="card-title text-base truncate" title={name}>{name}</h3>
|
||||
<div className="flex flex-wrap gap-1 mb-2">
|
||||
{tags.slice(0, 3).map(tag => (
|
||||
<span key={tag} className="badge badge-primary badge-xs">{tag}</span>
|
||||
))}
|
||||
{tags.length > 3 && (
|
||||
<span className="badge badge-ghost badge-xs">+{tags.length - 3}</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="text-sm space-y-1">
|
||||
<p className="text-gray-500">{pageCount} page{pageCount > 1 ? 's' : ''}</p>
|
||||
<p className="text-gray-500">{formatFileSize(fileSize)}</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Detail mode
|
||||
return (
|
||||
<div className="card-body">
|
||||
<h3 className="card-title text-lg" title={name}>{name}</h3>
|
||||
|
||||
{/* Tags */}
|
||||
{tags.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1 mb-2">
|
||||
{tags.map(tag => (
|
||||
<span key={tag} className="badge badge-primary badge-sm">{tag}</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Categories */}
|
||||
{categories.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1 mb-3">
|
||||
{categories.map(category => (
|
||||
<span key={category} className="badge badge-secondary badge-sm">{category}</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metadata */}
|
||||
<div className="grid grid-cols-2 gap-2 text-sm">
|
||||
<div>
|
||||
<span className="font-semibold">Pages:</span>
|
||||
<span className="ml-2 text-gray-500">{pageCount}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="font-semibold">Size:</span>
|
||||
<span className="ml-2 text-gray-500">{formatFileSize(fileSize)}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="font-semibold">Type:</span>
|
||||
<span className="ml-2 text-gray-500">{originalFileType}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="font-semibold">Date:</span>
|
||||
<span className="ml-2 text-gray-500">{formatDate(createdAt)}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={getCardClasses()}>
|
||||
{renderThumbnail()}
|
||||
{renderCardBody()}
|
||||
</div>
|
||||
);
|
||||
});
|
||||
|
||||
DocumentCard.displayName = 'DocumentCard';
|
||||
|
||||
export default DocumentCard;
|
||||
164
src/frontend/src/components/documents/DocumentDetailView.jsx
Normal file
164
src/frontend/src/components/documents/DocumentDetailView.jsx
Normal file
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* DocumentDetailView Component
|
||||
* Displays a document in detail mode with all pages visible
|
||||
* This is a placeholder that shows multiple page thumbnails
|
||||
* When real PDF backend is ready, this can be replaced with actual PDF rendering
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
|
||||
/**
|
||||
* Formats file size to human-readable format
|
||||
* @param {number} bytes - File size in bytes
|
||||
* @returns {string} Formatted file size
|
||||
*/
|
||||
const formatFileSize = (bytes) => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats date to localized string
|
||||
* @param {string} dateString - ISO date string
|
||||
* @returns {string} Formatted date
|
||||
*/
|
||||
const formatDate = (dateString) => {
|
||||
return new Date(dateString).toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'long',
|
||||
day: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit'
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* DocumentDetailView component
|
||||
* @param {Object} props
|
||||
* @param {Object} props.document - Document object
|
||||
* @param {function(): void} props.onEdit - Callback when edit is clicked
|
||||
* @param {function(): void} props.onDelete - Callback when delete is clicked
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const DocumentDetailView = ({ document, onEdit, onDelete }) => {
|
||||
const {
|
||||
name,
|
||||
originalFileType,
|
||||
thumbnailUrl,
|
||||
pageCount,
|
||||
fileSize,
|
||||
createdAt,
|
||||
tags,
|
||||
categories
|
||||
} = document;
|
||||
|
||||
// Generate placeholder pages (in real implementation, these would be actual PDF pages)
|
||||
const pages = Array.from({ length: pageCount }, (_, i) => ({
|
||||
pageNumber: i + 1,
|
||||
thumbnailUrl: thumbnailUrl.replace('Page+1', `Page+${i + 1}`)
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="card bg-base-100 shadow-xl">
|
||||
{/* Header with actions */}
|
||||
<div className="card-body">
|
||||
<div className="flex justify-between items-start mb-4">
|
||||
<div className="flex-1">
|
||||
<h2 className="card-title text-2xl mb-2">{name}</h2>
|
||||
|
||||
{/* Tags */}
|
||||
{tags.length > 0 && (
|
||||
<div className="flex flex-wrap gap-2 mb-2">
|
||||
<span className="text-sm font-semibold text-gray-600">Tags:</span>
|
||||
{tags.map(tag => (
|
||||
<span key={tag} className="badge badge-primary">{tag}</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Categories */}
|
||||
{categories.length > 0 && (
|
||||
<div className="flex flex-wrap gap-2 mb-3">
|
||||
<span className="text-sm font-semibold text-gray-600">Categories:</span>
|
||||
{categories.map(category => (
|
||||
<span key={category} className="badge badge-secondary">{category}</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
className="btn btn-primary btn-sm"
|
||||
onClick={onEdit}
|
||||
aria-label="Edit document"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
|
||||
</svg>
|
||||
Edit
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-error btn-sm"
|
||||
onClick={onDelete}
|
||||
aria-label="Delete document"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Metadata grid */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6 p-4 bg-base-200 rounded-lg">
|
||||
<div>
|
||||
<span className="text-sm font-semibold text-gray-600">Original Type</span>
|
||||
<p className="text-lg">{originalFileType}</p>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-sm font-semibold text-gray-600">Pages</span>
|
||||
<p className="text-lg">{pageCount}</p>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-sm font-semibold text-gray-600">File Size</span>
|
||||
<p className="text-lg">{formatFileSize(fileSize)}</p>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-sm font-semibold text-gray-600">Created</span>
|
||||
<p className="text-lg">{formatDate(createdAt)}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Pages preview */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold mb-4">Document Pages ({pageCount})</h3>
|
||||
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4">
|
||||
{pages.map((page) => (
|
||||
<div key={page.pageNumber} className="relative group">
|
||||
<div className="aspect-[3/4] bg-base-200 rounded-lg overflow-hidden shadow-md hover:shadow-xl transition-shadow">
|
||||
<img
|
||||
src={page.thumbnailUrl}
|
||||
alt={`Page ${page.pageNumber}`}
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
/>
|
||||
</div>
|
||||
<div className="text-center mt-2">
|
||||
<span className="text-sm text-gray-600">Page {page.pageNumber}</span>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default DocumentDetailView;
|
||||
180
src/frontend/src/components/documents/DocumentGallery.jsx
Normal file
180
src/frontend/src/components/documents/DocumentGallery.jsx
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* DocumentGallery Component
|
||||
* Main container for displaying documents in different view modes
|
||||
*/
|
||||
|
||||
import React, { useState } from 'react';
|
||||
import DocumentCard from './DocumentCard';
|
||||
import DocumentDetailView from './DocumentDetailView';
|
||||
import ViewModeSwitcher from './ViewModeSwitcher';
|
||||
import EditDocumentModal from './EditDocumentModal';
|
||||
import DeleteConfirmModal from './DeleteConfirmModal';
|
||||
import { useDocuments } from '../../hooks/useDocuments';
|
||||
|
||||
/**
|
||||
* DocumentGallery component
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const DocumentGallery = () => {
|
||||
const { documents, loading, error, updateDocument, deleteDocument } = useDocuments();
|
||||
const [viewMode, setViewMode] = useState('large');
|
||||
const [editingDocument, setEditingDocument] = useState(null);
|
||||
const [deletingDocument, setDeletingDocument] = useState(null);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
|
||||
/**
|
||||
* Handles opening the edit modal
|
||||
* @param {Object} document - Document to edit
|
||||
*/
|
||||
const handleEditClick = (document) => {
|
||||
setEditingDocument(document);
|
||||
};
|
||||
|
||||
/**
|
||||
* Handles opening the delete confirmation modal
|
||||
* @param {Object} document - Document to delete
|
||||
*/
|
||||
const handleDeleteClick = (document) => {
|
||||
setDeletingDocument(document);
|
||||
};
|
||||
|
||||
/**
|
||||
* Handles saving document changes
|
||||
* @param {Object} updates - Updates object with tags and categories
|
||||
*/
|
||||
const handleSaveEdit = async (updates) => {
|
||||
if (!editingDocument) return;
|
||||
|
||||
setIsSaving(true);
|
||||
const success = await updateDocument(editingDocument.id, updates);
|
||||
setIsSaving(false);
|
||||
|
||||
if (success) {
|
||||
setEditingDocument(null);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Handles confirming document deletion
|
||||
*/
|
||||
const handleConfirmDelete = async () => {
|
||||
if (!deletingDocument) return;
|
||||
|
||||
setIsDeleting(true);
|
||||
const success = await deleteDocument(deletingDocument.id);
|
||||
setIsDeleting(false);
|
||||
|
||||
if (success) {
|
||||
setDeletingDocument(null);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets grid classes based on view mode
|
||||
* @returns {string} Tailwind CSS classes
|
||||
*/
|
||||
const getGridClasses = () => {
|
||||
switch (viewMode) {
|
||||
case 'small':
|
||||
return 'grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-5 xl:grid-cols-6 gap-4';
|
||||
case 'large':
|
||||
return 'grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-6';
|
||||
case 'detail':
|
||||
return 'flex flex-col gap-6';
|
||||
default:
|
||||
return 'grid grid-cols-1 gap-4';
|
||||
}
|
||||
};
|
||||
|
||||
// Loading state
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex justify-center items-center min-h-[400px]">
|
||||
<span className="loading loading-spinner loading-lg"></span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Error state
|
||||
if (error) {
|
||||
return (
|
||||
<div className="alert alert-error">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<span>Error loading documents: {error}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Empty state
|
||||
if (documents.length === 0) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center min-h-[400px] text-center">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" className="h-24 w-24 text-gray-300 mb-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||
</svg>
|
||||
<h3 className="text-xl font-semibold mb-2">No documents yet</h3>
|
||||
<p className="text-gray-500">Upload your first document to get started</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
{/* Header with view mode switcher */}
|
||||
<div className="flex justify-between items-center mb-6">
|
||||
<div>
|
||||
<h2 className="text-2xl font-bold">Documents</h2>
|
||||
<p className="text-gray-500">{documents.length} document{documents.length !== 1 ? 's' : ''}</p>
|
||||
</div>
|
||||
<ViewModeSwitcher
|
||||
currentMode={viewMode}
|
||||
onModeChange={setViewMode}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Document grid/list */}
|
||||
<div className={getGridClasses()}>
|
||||
{documents.map(document => (
|
||||
viewMode === 'detail' ? (
|
||||
<DocumentDetailView
|
||||
key={document.id}
|
||||
document={document}
|
||||
onEdit={() => handleEditClick(document)}
|
||||
onDelete={() => handleDeleteClick(document)}
|
||||
/>
|
||||
) : (
|
||||
<DocumentCard
|
||||
key={document.id}
|
||||
document={document}
|
||||
viewMode={viewMode}
|
||||
onEdit={() => handleEditClick(document)}
|
||||
onDelete={() => handleDeleteClick(document)}
|
||||
/>
|
||||
)
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Modals */}
|
||||
<EditDocumentModal
|
||||
isOpen={!!editingDocument}
|
||||
document={editingDocument}
|
||||
onClose={() => setEditingDocument(null)}
|
||||
onSave={handleSaveEdit}
|
||||
isSaving={isSaving}
|
||||
/>
|
||||
|
||||
<DeleteConfirmModal
|
||||
isOpen={!!deletingDocument}
|
||||
document={deletingDocument}
|
||||
onClose={() => setDeletingDocument(null)}
|
||||
onConfirm={handleConfirmDelete}
|
||||
isDeleting={isDeleting}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default DocumentGallery;
|
||||
225
src/frontend/src/components/documents/EditDocumentModal.jsx
Normal file
225
src/frontend/src/components/documents/EditDocumentModal.jsx
Normal file
@@ -0,0 +1,225 @@
|
||||
/**
|
||||
* EditDocumentModal Component
|
||||
* Modal dialog for editing document tags and categories
|
||||
*/
|
||||
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { getAvailableTags, getAvailableCategories } from '../../services/documentService';
|
||||
|
||||
/**
|
||||
* EditDocumentModal component
|
||||
* @param {Object} props
|
||||
* @param {boolean} props.isOpen - Whether the modal is open
|
||||
* @param {Object|null} props.document - Document to edit
|
||||
* @param {function(): void} props.onClose - Callback when modal is closed
|
||||
* @param {function(Object): void} props.onSave - Callback when changes are saved
|
||||
* @param {boolean} props.isSaving - Whether save is in progress
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const EditDocumentModal = ({
|
||||
isOpen,
|
||||
document,
|
||||
onClose,
|
||||
onSave,
|
||||
isSaving = false
|
||||
}) => {
|
||||
const [selectedTags, setSelectedTags] = useState([]);
|
||||
const [selectedCategories, setSelectedCategories] = useState([]);
|
||||
const [availableTags, setAvailableTags] = useState([]);
|
||||
const [availableCategories, setAvailableCategories] = useState([]);
|
||||
const [newTag, setNewTag] = useState('');
|
||||
const [newCategory, setNewCategory] = useState('');
|
||||
|
||||
// Load available tags and categories
|
||||
useEffect(() => {
|
||||
const loadOptions = async () => {
|
||||
const [tags, categories] = await Promise.all([
|
||||
getAvailableTags(),
|
||||
getAvailableCategories()
|
||||
]);
|
||||
setAvailableTags(tags);
|
||||
setAvailableCategories(categories);
|
||||
};
|
||||
loadOptions();
|
||||
}, []);
|
||||
|
||||
// Initialize selected values when document changes
|
||||
useEffect(() => {
|
||||
if (document) {
|
||||
setSelectedTags(document.tags || []);
|
||||
setSelectedCategories(document.categories || []);
|
||||
}
|
||||
}, [document]);
|
||||
|
||||
const handleAddTag = (tag) => {
|
||||
if (tag && !selectedTags.includes(tag)) {
|
||||
setSelectedTags([...selectedTags, tag]);
|
||||
}
|
||||
setNewTag('');
|
||||
};
|
||||
|
||||
const handleRemoveTag = (tag) => {
|
||||
setSelectedTags(selectedTags.filter(t => t !== tag));
|
||||
};
|
||||
|
||||
const handleAddCategory = (category) => {
|
||||
if (category && !selectedCategories.includes(category)) {
|
||||
setSelectedCategories([...selectedCategories, category]);
|
||||
}
|
||||
setNewCategory('');
|
||||
};
|
||||
|
||||
const handleRemoveCategory = (category) => {
|
||||
setSelectedCategories(selectedCategories.filter(c => c !== category));
|
||||
};
|
||||
|
||||
const handleSave = () => {
|
||||
onSave({
|
||||
tags: selectedTags,
|
||||
categories: selectedCategories
|
||||
});
|
||||
};
|
||||
|
||||
if (!isOpen || !document) return null;
|
||||
|
||||
return (
|
||||
<dialog className="modal modal-open">
|
||||
<div className="modal-box max-w-2xl">
|
||||
<h3 className="font-bold text-lg mb-4">Edit Document</h3>
|
||||
|
||||
<div className="mb-4">
|
||||
<p className="text-sm text-gray-500">
|
||||
Document: <span className="font-semibold">{document.name}</span>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Tags Section */}
|
||||
<div className="mb-6">
|
||||
<label className="label">
|
||||
<span className="label-text font-semibold">Tags</span>
|
||||
</label>
|
||||
|
||||
{/* Selected Tags */}
|
||||
<div className="flex flex-wrap gap-2 mb-3">
|
||||
{selectedTags.map(tag => (
|
||||
<div key={tag} className="badge badge-primary gap-2">
|
||||
{tag}
|
||||
<button
|
||||
type="button"
|
||||
className="btn btn-ghost btn-xs"
|
||||
onClick={() => handleRemoveTag(tag)}
|
||||
disabled={isSaving}
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Add Tag */}
|
||||
<div className="flex gap-2">
|
||||
<select
|
||||
className="select select-bordered flex-1"
|
||||
value={newTag}
|
||||
onChange={(e) => setNewTag(e.target.value)}
|
||||
disabled={isSaving}
|
||||
>
|
||||
<option value="">Select a tag...</option>
|
||||
{availableTags
|
||||
.filter(tag => !selectedTags.includes(tag))
|
||||
.map(tag => (
|
||||
<option key={tag} value={tag}>{tag}</option>
|
||||
))
|
||||
}
|
||||
</select>
|
||||
<button
|
||||
className="btn btn-primary"
|
||||
onClick={() => handleAddTag(newTag)}
|
||||
disabled={!newTag || isSaving}
|
||||
>
|
||||
Add
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Categories Section */}
|
||||
<div className="mb-6">
|
||||
<label className="label">
|
||||
<span className="label-text font-semibold">Categories</span>
|
||||
</label>
|
||||
|
||||
{/* Selected Categories */}
|
||||
<div className="flex flex-wrap gap-2 mb-3">
|
||||
{selectedCategories.map(category => (
|
||||
<div key={category} className="badge badge-secondary gap-2">
|
||||
{category}
|
||||
<button
|
||||
type="button"
|
||||
className="btn btn-ghost btn-xs"
|
||||
onClick={() => handleRemoveCategory(category)}
|
||||
disabled={isSaving}
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Add Category */}
|
||||
<div className="flex gap-2">
|
||||
<select
|
||||
className="select select-bordered flex-1"
|
||||
value={newCategory}
|
||||
onChange={(e) => setNewCategory(e.target.value)}
|
||||
disabled={isSaving}
|
||||
>
|
||||
<option value="">Select a category...</option>
|
||||
{availableCategories
|
||||
.filter(cat => !selectedCategories.includes(cat))
|
||||
.map(cat => (
|
||||
<option key={cat} value={cat}>{cat}</option>
|
||||
))
|
||||
}
|
||||
</select>
|
||||
<button
|
||||
className="btn btn-secondary"
|
||||
onClick={() => handleAddCategory(newCategory)}
|
||||
disabled={!newCategory || isSaving}
|
||||
>
|
||||
Add
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="modal-action">
|
||||
<button
|
||||
className="btn btn-ghost"
|
||||
onClick={onClose}
|
||||
disabled={isSaving}
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-primary"
|
||||
onClick={handleSave}
|
||||
disabled={isSaving}
|
||||
>
|
||||
{isSaving ? (
|
||||
<>
|
||||
<span className="loading loading-spinner loading-sm"></span>
|
||||
Saving...
|
||||
</>
|
||||
) : (
|
||||
'Save Changes'
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<form method="dialog" className="modal-backdrop" onClick={onClose}>
|
||||
<button disabled={isSaving}>close</button>
|
||||
</form>
|
||||
</dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default EditDocumentModal;
|
||||
46
src/frontend/src/components/documents/ViewModeSwitcher.jsx
Normal file
46
src/frontend/src/components/documents/ViewModeSwitcher.jsx
Normal file
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* ViewModeSwitcher Component
|
||||
* Allows users to switch between different view modes (small, large, detail)
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
|
||||
/**
|
||||
* @typedef {'small' | 'large' | 'detail'} ViewMode
|
||||
*/
|
||||
|
||||
/**
|
||||
* ViewModeSwitcher component
|
||||
* @param {Object} props
|
||||
* @param {ViewMode} props.currentMode - Current active view mode
|
||||
* @param {function(ViewMode): void} props.onModeChange - Callback when mode changes
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const ViewModeSwitcher = ({ currentMode, onModeChange }) => {
|
||||
const modes = [
|
||||
{ id: 'small', label: 'Small', icon: '⊞' },
|
||||
{ id: 'large', label: 'Large', icon: '⊡' },
|
||||
{ id: 'detail', label: 'Detail', icon: '☰' }
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="flex gap-2">
|
||||
{modes.map(mode => (
|
||||
<button
|
||||
key={mode.id}
|
||||
onClick={() => onModeChange(mode.id)}
|
||||
className={`btn btn-sm ${
|
||||
currentMode === mode.id ? 'btn-primary' : 'btn-ghost'
|
||||
}`}
|
||||
aria-label={`Switch to ${mode.label} view`}
|
||||
title={`${mode.label} view`}
|
||||
>
|
||||
<span className="text-lg">{mode.icon}</span>
|
||||
<span className="hidden sm:inline ml-1">{mode.label}</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ViewModeSwitcher;
|
||||
85
src/frontend/src/hooks/useDocuments.js
Normal file
85
src/frontend/src/hooks/useDocuments.js
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Custom hook for managing documents
|
||||
* Handles fetching, updating, and deleting documents
|
||||
*/
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import * as documentService from '../services/documentService';
|
||||
|
||||
/**
|
||||
* Hook for managing documents state and operations
|
||||
* @returns {Object} Documents state and operations
|
||||
*/
|
||||
export const useDocuments = () => {
|
||||
const [documents, setDocuments] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState(null);
|
||||
|
||||
/**
|
||||
* Fetches all documents from the service
|
||||
*/
|
||||
const fetchDocuments = useCallback(async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
const data = await documentService.getAllDocuments();
|
||||
setDocuments(data);
|
||||
} catch (err) {
|
||||
setError(err.message);
|
||||
console.error('Error fetching documents:', err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Updates a document's tags and categories
|
||||
* @param {string} id - Document ID
|
||||
* @param {Object} updates - Updates object
|
||||
* @returns {Promise<boolean>} Success status
|
||||
*/
|
||||
const updateDocument = useCallback(async (id, updates) => {
|
||||
try {
|
||||
const updatedDoc = await documentService.updateDocument(id, updates);
|
||||
setDocuments(prevDocs =>
|
||||
prevDocs.map(doc => (doc.id === id ? updatedDoc : doc))
|
||||
);
|
||||
return true;
|
||||
} catch (err) {
|
||||
setError(err.message);
|
||||
console.error('Error updating document:', err);
|
||||
return false;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Deletes a document
|
||||
* @param {string} id - Document ID
|
||||
* @returns {Promise<boolean>} Success status
|
||||
*/
|
||||
const deleteDocument = useCallback(async (id) => {
|
||||
try {
|
||||
await documentService.deleteDocument(id);
|
||||
setDocuments(prevDocs => prevDocs.filter(doc => doc.id !== id));
|
||||
return true;
|
||||
} catch (err) {
|
||||
setError(err.message);
|
||||
console.error('Error deleting document:', err);
|
||||
return false;
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Fetch documents on mount
|
||||
useEffect(() => {
|
||||
fetchDocuments();
|
||||
}, [fetchDocuments]);
|
||||
|
||||
return {
|
||||
documents,
|
||||
loading,
|
||||
error,
|
||||
fetchDocuments,
|
||||
updateDocument,
|
||||
deleteDocument
|
||||
};
|
||||
};
|
||||
21
src/frontend/src/pages/DocumentsPage.jsx
Normal file
21
src/frontend/src/pages/DocumentsPage.jsx
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* DocumentsPage Component
|
||||
* Main page for displaying and managing documents
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
import DocumentGallery from '../components/documents/DocumentGallery';
|
||||
|
||||
/**
|
||||
* DocumentsPage component
|
||||
* @returns {JSX.Element}
|
||||
*/
|
||||
const DocumentsPage = () => {
|
||||
return (
|
||||
<div className="container mx-auto px-4 py-8">
|
||||
<DocumentGallery />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default DocumentsPage;
|
||||
97
src/frontend/src/services/documentService.js
Normal file
97
src/frontend/src/services/documentService.js
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Document Service
|
||||
* Handles all API calls related to documents
|
||||
* Currently using mock data for development
|
||||
*/
|
||||
|
||||
import { mockDocuments, availableTags, availableCategories } from '../utils/mockData';
|
||||
import api from '../utils/api';
|
||||
|
||||
// Simulate network delay
|
||||
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
||||
|
||||
/**
|
||||
* Fetches all documents from the API
|
||||
* @returns {Promise<Array>} Array of document objects
|
||||
*/
|
||||
export const getAllDocuments = async () => {
|
||||
try {
|
||||
const response = await api.get('/api/documents');
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch documents:', error);
|
||||
// Fallback to mock data in case of API error during development
|
||||
console.warn('Falling back to mock data');
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetches a single document by ID
|
||||
* @param {string} id - Document ID
|
||||
* @returns {Promise<Object|null>} Document object or null if not found
|
||||
*/
|
||||
export const getDocumentById = async (id) => {
|
||||
await delay(300);
|
||||
const document = mockDocuments.find(doc => doc.id === id);
|
||||
return document || null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Updates a document's tags and categories
|
||||
* @param {string} id - Document ID
|
||||
* @param {Object} updates - Object containing tags and/or categories
|
||||
* @param {Array<string>} updates.tags - New tags array
|
||||
* @param {Array<string>} updates.categories - New categories array
|
||||
* @returns {Promise<Object>} Updated document object
|
||||
*/
|
||||
export const updateDocument = async (id, updates) => {
|
||||
await delay(400);
|
||||
|
||||
const index = mockDocuments.findIndex(doc => doc.id === id);
|
||||
if (index === -1) {
|
||||
throw new Error('Document not found');
|
||||
}
|
||||
|
||||
// Update the document
|
||||
mockDocuments[index] = {
|
||||
...mockDocuments[index],
|
||||
...updates
|
||||
};
|
||||
|
||||
return mockDocuments[index];
|
||||
};
|
||||
|
||||
/**
|
||||
* Deletes a document
|
||||
* @param {string} id - Document ID
|
||||
* @returns {Promise<boolean>} True if deletion was successful
|
||||
*/
|
||||
export const deleteDocument = async (id) => {
|
||||
await delay(300);
|
||||
|
||||
const index = mockDocuments.findIndex(doc => doc.id === id);
|
||||
if (index === -1) {
|
||||
throw new Error('Document not found');
|
||||
}
|
||||
|
||||
mockDocuments.splice(index, 1);
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets all available tags
|
||||
* @returns {Promise<Array<string>>} Array of tag strings
|
||||
*/
|
||||
export const getAvailableTags = async () => {
|
||||
await delay(200);
|
||||
return [...availableTags];
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets all available categories
|
||||
* @returns {Promise<Array<string>>} Array of category strings
|
||||
*/
|
||||
export const getAvailableCategories = async () => {
|
||||
await delay(200);
|
||||
return [...availableCategories];
|
||||
};
|
||||
155
src/frontend/src/utils/mockData.js
Normal file
155
src/frontend/src/utils/mockData.js
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Mock data for PDF documents
|
||||
* This file provides sample data for development and testing purposes
|
||||
*/
|
||||
|
||||
/**
|
||||
* Generates a placeholder thumbnail URL
|
||||
* @param {number} index - Document index for unique colors
|
||||
* @returns {string} Placeholder image URL
|
||||
*/
|
||||
const generateThumbnailUrl = (index) => {
|
||||
const colors = ['3B82F6', '10B981', 'F59E0B', 'EF4444', '8B5CF6', 'EC4899'];
|
||||
const color = colors[index % colors.length];
|
||||
return `https://via.placeholder.com/300x400/${color}/FFFFFF?text=Page+1`;
|
||||
};
|
||||
|
||||
/**
|
||||
* Mock documents data
|
||||
* @type {Array<Object>}
|
||||
*/
|
||||
export const mockDocuments = [
|
||||
{
|
||||
id: 'doc-001',
|
||||
name: 'Contrat-2025.pdf',
|
||||
originalFileType: 'DOCX',
|
||||
createdAt: '2025-10-01T10:30:00Z',
|
||||
fileSize: 2048576, // 2 MB
|
||||
pageCount: 12,
|
||||
thumbnailUrl: generateThumbnailUrl(0),
|
||||
pdfUrl: '/mock/contrat-2025.pdf',
|
||||
tags: ['contrat', '2025'],
|
||||
categories: ['legal']
|
||||
},
|
||||
{
|
||||
id: 'doc-002',
|
||||
name: 'Facture-Janvier.pdf',
|
||||
originalFileType: 'XLSX',
|
||||
createdAt: '2025-09-15T14:20:00Z',
|
||||
fileSize: 512000, // 512 KB
|
||||
pageCount: 3,
|
||||
thumbnailUrl: generateThumbnailUrl(1),
|
||||
pdfUrl: '/mock/facture-janvier.pdf',
|
||||
tags: ['facture', 'comptabilité'],
|
||||
categories: ['finance']
|
||||
},
|
||||
{
|
||||
id: 'doc-003',
|
||||
name: 'Présentation-Projet.pdf',
|
||||
originalFileType: 'PPTX',
|
||||
createdAt: '2025-09-28T09:15:00Z',
|
||||
fileSize: 5242880, // 5 MB
|
||||
pageCount: 24,
|
||||
thumbnailUrl: generateThumbnailUrl(2),
|
||||
pdfUrl: '/mock/presentation-projet.pdf',
|
||||
tags: ['présentation', 'projet'],
|
||||
categories: ['marketing']
|
||||
},
|
||||
{
|
||||
id: 'doc-004',
|
||||
name: 'Photo-Identité.pdf',
|
||||
originalFileType: 'JPG',
|
||||
createdAt: '2025-10-05T16:45:00Z',
|
||||
fileSize: 204800, // 200 KB
|
||||
pageCount: 1,
|
||||
thumbnailUrl: generateThumbnailUrl(3),
|
||||
pdfUrl: '/mock/photo-identite.pdf',
|
||||
tags: ['photo', 'identité'],
|
||||
categories: ['personnel']
|
||||
},
|
||||
{
|
||||
id: 'doc-005',
|
||||
name: 'Manuel-Utilisateur.pdf',
|
||||
originalFileType: 'PDF',
|
||||
createdAt: '2025-09-20T11:00:00Z',
|
||||
fileSize: 3145728, // 3 MB
|
||||
pageCount: 45,
|
||||
thumbnailUrl: generateThumbnailUrl(4),
|
||||
pdfUrl: '/mock/manuel-utilisateur.pdf',
|
||||
tags: ['manuel', 'documentation'],
|
||||
categories: ['technique']
|
||||
},
|
||||
{
|
||||
id: 'doc-006',
|
||||
name: 'Rapport-Annuel.pdf',
|
||||
originalFileType: 'DOCX',
|
||||
createdAt: '2025-08-30T13:30:00Z',
|
||||
fileSize: 4194304, // 4 MB
|
||||
pageCount: 67,
|
||||
thumbnailUrl: generateThumbnailUrl(5),
|
||||
pdfUrl: '/mock/rapport-annuel.pdf',
|
||||
tags: ['rapport', 'annuel'],
|
||||
categories: ['finance', 'management']
|
||||
},
|
||||
{
|
||||
id: 'doc-007',
|
||||
name: 'CV-Candidat.pdf',
|
||||
originalFileType: 'DOCX',
|
||||
createdAt: '2025-10-02T08:00:00Z',
|
||||
fileSize: 153600, // 150 KB
|
||||
pageCount: 2,
|
||||
thumbnailUrl: generateThumbnailUrl(0),
|
||||
pdfUrl: '/mock/cv-candidat.pdf',
|
||||
tags: ['cv', 'recrutement'],
|
||||
categories: ['rh']
|
||||
},
|
||||
{
|
||||
id: 'doc-008',
|
||||
name: 'Devis-Travaux.pdf',
|
||||
originalFileType: 'XLSX',
|
||||
createdAt: '2025-09-25T15:20:00Z',
|
||||
fileSize: 409600, // 400 KB
|
||||
pageCount: 5,
|
||||
thumbnailUrl: generateThumbnailUrl(1),
|
||||
pdfUrl: '/mock/devis-travaux.pdf',
|
||||
tags: ['devis', 'travaux'],
|
||||
categories: ['finance']
|
||||
}
|
||||
];
|
||||
|
||||
/**
|
||||
* Available tags for documents
|
||||
* @type {Array<string>}
|
||||
*/
|
||||
export const availableTags = [
|
||||
'contrat',
|
||||
'facture',
|
||||
'présentation',
|
||||
'photo',
|
||||
'manuel',
|
||||
'rapport',
|
||||
'cv',
|
||||
'devis',
|
||||
'comptabilité',
|
||||
'projet',
|
||||
'identité',
|
||||
'documentation',
|
||||
'annuel',
|
||||
'recrutement',
|
||||
'travaux',
|
||||
'2025'
|
||||
];
|
||||
|
||||
/**
|
||||
* Available categories for documents
|
||||
* @type {Array<string>}
|
||||
*/
|
||||
export const availableCategories = [
|
||||
'legal',
|
||||
'finance',
|
||||
'marketing',
|
||||
'personnel',
|
||||
'technique',
|
||||
'management',
|
||||
'rh'
|
||||
];
|
||||
@@ -12,12 +12,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
texlive-xetex \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Change the user
|
||||
USER 1002:1002
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
|
||||
# Command will be overridden by docker-compose
|
||||
CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
|
||||
|
||||
@@ -5,10 +5,12 @@ email-validator==2.3.0
|
||||
fastapi==0.116.1
|
||||
httptools==0.6.4
|
||||
motor==3.7.1
|
||||
pikepdf==9.11.0
|
||||
pillow==11.3.0
|
||||
pydantic==2.11.9
|
||||
PyJWT==2.10.1
|
||||
pymongo==4.15.0
|
||||
PyMuPDF==1.26.4
|
||||
pypandoc==1.15
|
||||
python-multipart==0.0.20
|
||||
redis==6.4.0
|
||||
|
||||
@@ -1,22 +1,14 @@
|
||||
import subprocess
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import magic # python-magic
|
||||
|
||||
from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
|
||||
|
||||
|
||||
class UnsupportedFileTypeError(Exception):
|
||||
"""Exception raised when a file type is not supported."""
|
||||
pass
|
||||
|
||||
|
||||
def generate_uuid_filename() -> str:
|
||||
"""Generate a unique filename using UUID4."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def detect_file_type(file_path: str) -> str:
|
||||
"""
|
||||
Detect the type of file using python-magic.
|
||||
@@ -28,12 +20,19 @@ def detect_file_type(file_path: str) -> str:
|
||||
UnsupportedFileTypeError: If file type is not supported.
|
||||
"""
|
||||
mime = magic.from_file(file_path, mime=True)
|
||||
extension = Path(file_path).suffix
|
||||
if mime.startswith("text/"):
|
||||
return "text"
|
||||
elif mime.startswith("image/"):
|
||||
return "image"
|
||||
elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
|
||||
return "word"
|
||||
elif mime == "application/pdf":
|
||||
return "pdf"
|
||||
elif mime == "application/vnd.ms-powerpoint":
|
||||
return "powerpoint"
|
||||
elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"):
|
||||
return "image"
|
||||
else:
|
||||
raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
|
||||
|
||||
@@ -72,31 +71,3 @@ def compress_pdf(input_pdf: str, output_pdf: str, quality: str = "ebook") -> Non
|
||||
result = subprocess.run(cmd)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Ghostscript failed with return code {result.returncode}")
|
||||
|
||||
|
||||
def convert_to_pdf(filepath: str, output_dir: str = ".") -> str:
|
||||
"""
|
||||
Convert any supported file to PDF.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the input file.
|
||||
output_dir (str): Directory to save the output PDF.
|
||||
|
||||
Returns:
|
||||
str: Path to the generated PDF.
|
||||
|
||||
Raises:
|
||||
UnsupportedFileTypeError: If the input file type is not supported.
|
||||
"""
|
||||
file_type = detect_file_type(filepath)
|
||||
|
||||
if file_type == "text":
|
||||
converter = TextToPdfConverter(filepath, output_dir=output_dir)
|
||||
elif file_type == "image":
|
||||
converter = ImageToPdfConverter(filepath, output_dir=output_dir)
|
||||
elif file_type == "word":
|
||||
converter = WordToPdfConverter(filepath, output_dir=output_dir)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {file_type}")
|
||||
|
||||
return converter.convert()
|
||||
|
||||
64
src/worker/tasks/common/document_utils.py
Normal file
64
src/worker/tasks/common/document_utils.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_file_hash(file_bytes: bytes) -> str:
|
||||
"""
|
||||
Calculate SHA256 hash of file content.
|
||||
|
||||
Args:
|
||||
file_bytes: Raw file content as bytes
|
||||
|
||||
Returns:
|
||||
Hexadecimal SHA256 hash string
|
||||
"""
|
||||
return hashlib.sha256(file_bytes).hexdigest()
|
||||
|
||||
|
||||
def get_object_path(file_hash):
|
||||
"""
|
||||
|
||||
:param file_hash:
|
||||
:return:
|
||||
"""
|
||||
root = settings.get_objects_folder()
|
||||
return os.path.join(root, file_hash[:24], file_hash)
|
||||
|
||||
|
||||
def save_as_object(file_path, remove_on_success=True) -> str:
|
||||
"""
|
||||
Read the file, get the hash and save using the hash as the filename.
|
||||
:param file_path:
|
||||
:param remove_on_success:
|
||||
:return: hash of the file
|
||||
"""
|
||||
logger.info(f"Saving file {file_path} as object")
|
||||
path = Path(file_path)
|
||||
as_bytes = path.read_bytes()
|
||||
|
||||
file_hash = get_file_hash(as_bytes)
|
||||
logger.info(f"File hash: {file_hash}")
|
||||
|
||||
object_path = get_object_path(file_hash)
|
||||
if os.path.exists(object_path):
|
||||
logger.info(f"Object already exists: {object_path}")
|
||||
return file_hash
|
||||
|
||||
if not os.path.exists(os.path.dirname(object_path)):
|
||||
os.makedirs(os.path.dirname(object_path))
|
||||
|
||||
logger.info(f"Saving object to: {object_path}")
|
||||
with open(object_path, "wb") as f:
|
||||
f.write(as_bytes)
|
||||
|
||||
if remove_on_success:
|
||||
logger.info(f"Removing file: {file_path}")
|
||||
path.unlink()
|
||||
|
||||
return file_hash
|
||||
@@ -1,83 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import pypandoc
|
||||
from PIL import Image
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from tasks.common.converter_utils import generate_uuid_filename
|
||||
|
||||
|
||||
class BaseConverter(ABC):
|
||||
"""Abstract base class for file converters to PDF."""
|
||||
|
||||
def __init__(self, input_path: str, output_dir: str = ".") -> None:
|
||||
self.input_path = Path(input_path)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_path = self.output_dir / f"{generate_uuid_filename()}.pdf"
|
||||
|
||||
@abstractmethod
|
||||
def convert(self) -> str:
|
||||
"""Convert input file to PDF and return the output path."""
|
||||
pass
|
||||
|
||||
|
||||
class TextToPdfConverter(BaseConverter):
|
||||
"""Converter for text files to PDF."""
|
||||
|
||||
def convert(self) -> str:
|
||||
c = canvas.Canvas(str(self.output_path), pagesize=A4)
|
||||
width, height = A4
|
||||
with open(self.input_path, "r", encoding="utf-8") as f:
|
||||
y = height - 50
|
||||
for line in f:
|
||||
c.drawString(50, y, line.strip())
|
||||
y -= 15
|
||||
if y < 50:
|
||||
c.showPage()
|
||||
y = height - 50
|
||||
c.save()
|
||||
return str(self.output_path)
|
||||
|
||||
|
||||
class ImageToPdfConverter(BaseConverter):
|
||||
"""Converter for image files to PDF."""
|
||||
|
||||
def convert(self) -> str:
|
||||
image = Image.open(self.input_path)
|
||||
rgb_image = image.convert("RGB")
|
||||
rgb_image.save(self.output_path)
|
||||
return str(self.output_path)
|
||||
|
||||
|
||||
class WordToPdfConverter(BaseConverter):
|
||||
"""Converter for Word files (.docx) to PDF using pypandoc."""
|
||||
|
||||
def convert(self) -> str:
|
||||
pypandoc.convert_file(
|
||||
str(self.input_path), "pdf", outputfile=str(self.output_path)
|
||||
)
|
||||
return str(self.output_path)
|
||||
|
||||
|
||||
# Placeholders for future extensions
|
||||
class HtmlToPdfConverter(BaseConverter):
|
||||
"""Placeholder for HTML to PDF converter."""
|
||||
|
||||
def convert(self) -> str:
|
||||
raise NotImplementedError("HTML to PDF conversion not implemented.")
|
||||
|
||||
|
||||
class ExcelToPdfConverter(BaseConverter):
|
||||
"""Placeholder for Excel to PDF converter."""
|
||||
|
||||
def convert(self) -> str:
|
||||
raise NotImplementedError("Excel to PDF conversion not implemented.")
|
||||
|
||||
|
||||
class MarkdownToPdfConverter(BaseConverter):
|
||||
"""Placeholder for Markdown to PDF converter."""
|
||||
|
||||
def convert(self) -> str:
|
||||
raise NotImplementedError("Markdown to PDF conversion not implemented.")
|
||||
@@ -6,13 +6,14 @@ and update processing job statuses throughout the task lifecycle.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
from app.config import settings
|
||||
from app.database.connection import get_database
|
||||
from app.services.document_service import DocumentService
|
||||
from app.models.job import ProcessingStatus
|
||||
from app.services.document_service import DocumentService, DocumentAlreadyExists
|
||||
from app.services.job_service import JobService
|
||||
from tasks.common.converter_utils import convert_to_pdf
|
||||
from tasks.main import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -25,7 +26,8 @@ def get_services():
|
||||
return document_service, job_service
|
||||
|
||||
|
||||
@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
|
||||
# @celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
|
||||
@celery_app.task(bind=True)
|
||||
def process_document(self, filepath: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a document file and extract its content.
|
||||
@@ -46,30 +48,34 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
|
||||
Exception: Any processing error (will trigger retry)
|
||||
"""
|
||||
task_id = self.request.id
|
||||
logger.info(f"Starting document processing task {task_id} for file: {filepath}")
|
||||
logger.info(f'Task {task_id} : Starting document processing for file: "{filepath}"')
|
||||
|
||||
# get services
|
||||
document_service, job_service = get_services()
|
||||
|
||||
job = None
|
||||
document = None
|
||||
try:
|
||||
# Step 1: Insert the document in DB
|
||||
# Step 1: Create the document and a new job record for the document
|
||||
document = document_service.create_document(filepath)
|
||||
logger.info(f"Job {task_id} created for document {document.id} with file path: {filepath}")
|
||||
|
||||
# Step 2: Create a new job record for the document
|
||||
job = job_service.create_job(task_id=task_id, document_id=document.id)
|
||||
|
||||
# Step 3: Mark job as started
|
||||
job_service.mark_job_as_started(job_id=job.id)
|
||||
logger.info(f"Job {task_id} marked as PROCESSING")
|
||||
logger.info(f'Task {task_id} : Created document "{document.id}". Started job "{job.id}"')
|
||||
|
||||
# Step 4: Create the pdf version of the document
|
||||
pdf_file_path = convert_to_pdf(filepath, settings.get_temp_folder())
|
||||
logger.info(f"Task {task_id} : Creating associated PDF")
|
||||
job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF)
|
||||
document_service.create_pdf(document.id)
|
||||
|
||||
logger.info(f"Task {task_id} : Creating thumbnail")
|
||||
job_service.update_job_status(job_id=job.id, status=ProcessingStatus.CREATING_THUMBNAIL)
|
||||
document_service.create_thumbnail(document.id)
|
||||
|
||||
# remove the file from the watch folder
|
||||
os.remove(filepath)
|
||||
|
||||
# Step x: Mark job as completed
|
||||
job_service.mark_job_as_completed(job_id=job.id)
|
||||
logger.info(f"Job {task_id} marked as COMPLETED")
|
||||
logger.info(f"Task {task_id} marked as COMPLETED")
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
@@ -77,6 +83,19 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
|
||||
"status": "completed",
|
||||
}
|
||||
|
||||
except DocumentAlreadyExists as e:
|
||||
logger.info(f"Task {task_id} completed: {str(e)}")
|
||||
if job is not None:
|
||||
job_service.mark_job_as_completed(job_id=job.id)
|
||||
logger.info(f"Job {task_id} marked as COMPLETED")
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"filepath": filepath,
|
||||
"status": "completed",
|
||||
"message": str(e),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"Document processing failed: {str(e)}"
|
||||
logger.error(f"Task {task_id} failed: {error_message}")
|
||||
@@ -88,6 +107,11 @@ def process_document(self, filepath: str) -> Dict[str, Any]:
|
||||
logger.info(f"Job {task_id} marked as FAILED")
|
||||
else:
|
||||
logger.error(f"Failed to process {filepath}. error = {str(e)}")
|
||||
|
||||
if document is not None:
|
||||
document_service.move_to_errors(document.id, filepath)
|
||||
logger.info(f"Moved file {filepath} to errors/{document.id}")
|
||||
|
||||
except Exception as job_error:
|
||||
logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
|
||||
|
||||
|
||||
@@ -7,10 +7,10 @@ import logging
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
from celery.signals import worker_process_init
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
# Environment variables
|
||||
REDIS_URL = settings.get_redis_url()
|
||||
MONGODB_URL = settings.get_mongodb_url()
|
||||
@@ -38,11 +38,16 @@ celery_app.conf.update(
|
||||
task_soft_time_limit=240, # 4 minutes
|
||||
)
|
||||
|
||||
|
||||
def global_init(**kwargs):
|
||||
"""Initialize global variables."""
|
||||
logger.info(f"{'*' * 45}")
|
||||
logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}")
|
||||
logger.info(f"{'*' * 45}")
|
||||
|
||||
|
||||
global_init()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# initialize temp folder if needed
|
||||
tmp_folder = settings.get_temp_folder()
|
||||
if not os.path.exists(tmp_folder):
|
||||
logger.info(f"Creating temporary folder: {tmp_folder}")
|
||||
os.makedirs(tmp_folder)
|
||||
|
||||
global_init()
|
||||
celery_app.start()
|
||||
|
||||
@@ -568,3 +568,137 @@ class TestFileTypeDetection:
|
||||
"""Test unsupported file type raises ValueError."""
|
||||
with pytest.raises(ValueError, match="Unsupported file type"):
|
||||
document_service._detect_file_type("/path/to/document.xyz")
|
||||
|
||||
|
||||
class TestCreatePdf:
|
||||
"""Tests for create_pdf method."""
|
||||
|
||||
@patch('app.services.document_service.convert_to_pdf')
|
||||
@patch('app.services.document_service.magic.from_buffer')
|
||||
def test_i_can_create_pdf_successfully(
|
||||
self,
|
||||
mock_magic,
|
||||
mock_convert_to_pdf,
|
||||
document_service,
|
||||
sample_file_bytes
|
||||
):
|
||||
"""Test creating PDF from an existing document."""
|
||||
# Setup
|
||||
mock_magic.return_value = "text/plain"
|
||||
|
||||
# Create a document first
|
||||
created_doc = document_service.create_document(
|
||||
"/test/test.txt",
|
||||
sample_file_bytes,
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
# Mock the PDF conversion
|
||||
pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
|
||||
mock_convert_to_pdf.return_value = pdf_path
|
||||
|
||||
# Write a sample PDF file that the conversion would create
|
||||
pdf_content = b"This is PDF content"
|
||||
os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
|
||||
with open(pdf_path, "wb") as f:
|
||||
f.write(pdf_content)
|
||||
|
||||
# Execute
|
||||
result = document_service.create_pdf(created_doc.id)
|
||||
|
||||
# Verify
|
||||
assert result is True
|
||||
|
||||
# Get the updated document
|
||||
updated_doc = document_service.get_document_by_id(created_doc.id)
|
||||
assert updated_doc.pdf_file_hash is not None
|
||||
|
||||
# Verify the PDF content was saved
|
||||
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||
assert updated_doc.pdf_file_hash == pdf_hash
|
||||
|
||||
# Verify convert_to_pdf was called with correct arguments
|
||||
doc_path = document_service.get_document_path(created_doc.file_hash)
|
||||
mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
|
||||
|
||||
# Verify content exists on disk
|
||||
validate_file_saved(document_service, pdf_hash, pdf_content)
|
||||
|
||||
# Verify PDF hash was added to document
|
||||
updated_doc = document_service.get_document_by_id(created_doc.id)
|
||||
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||
assert updated_doc.pdf_file_hash == pdf_hash
|
||||
|
||||
@patch('app.services.document_service.convert_to_pdf')
|
||||
@patch('app.services.document_service.magic.from_buffer')
|
||||
def test_i_can_reuse_existing_pdf(
|
||||
self,
|
||||
mock_magic,
|
||||
mock_convert_to_pdf,
|
||||
document_service,
|
||||
sample_file_bytes
|
||||
):
|
||||
"""Test that if PDF already exists, it doesn't recreate it."""
|
||||
# Setup
|
||||
mock_magic.return_value = "text/plain"
|
||||
|
||||
# Create a document first
|
||||
created_doc = document_service.create_document(
|
||||
"/test/test.txt",
|
||||
sample_file_bytes,
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
# Create a fake PDF file and update the document
|
||||
pdf_content = b"This is PDF content"
|
||||
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||
document_service.save_content_if_needed(pdf_hash, pdf_content)
|
||||
document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
|
||||
|
||||
# Execute
|
||||
result = document_service.create_pdf(created_doc.id)
|
||||
|
||||
# Verify
|
||||
assert result is True
|
||||
|
||||
# Verify convert_to_pdf was NOT called
|
||||
mock_convert_to_pdf.assert_not_called()
|
||||
|
||||
def test_i_cannot_create_pdf_for_nonexistent_document(
|
||||
self,
|
||||
document_service
|
||||
):
|
||||
"""Test behavior when document ID doesn't exist."""
|
||||
# Execute with random ObjectId
|
||||
result = document_service.create_pdf(ObjectId())
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
@patch('app.services.document_service.magic.from_buffer')
|
||||
def test_i_cannot_create_pdf_when_file_content_missing(
|
||||
self,
|
||||
mock_magic,
|
||||
document_service,
|
||||
sample_file_bytes
|
||||
):
|
||||
"""Test behavior when file content doesn't exist."""
|
||||
# Setup
|
||||
mock_magic.return_value = "text/plain"
|
||||
|
||||
# Create a document
|
||||
created_doc = document_service.create_document(
|
||||
"/test/test.txt",
|
||||
sample_file_bytes,
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
# Simulate missing content by removing file
|
||||
file_path = document_service.get_document_path(created_doc.file_hash)
|
||||
os.remove(file_path)
|
||||
|
||||
# Execute
|
||||
result = document_service.create_pdf(created_doc.id)
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
@@ -417,6 +417,25 @@ class TestUpdateStatus:
|
||||
# Verify exception details
|
||||
assert exc_info.value.current_status == ProcessingStatus.FAILED
|
||||
assert exc_info.value.target_status == ProcessingStatus.FAILED
|
||||
|
||||
def test_i_can_update_job_status(
|
||||
self,
|
||||
job_service,
|
||||
sample_document_id,
|
||||
sample_task_id
|
||||
):
|
||||
"""Test that failed job cannot be marked as failed again."""
|
||||
# Create, start, and fail a job
|
||||
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||
job_service.mark_job_as_started(created_job.id)
|
||||
|
||||
# Execute without error message
|
||||
result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
|
||||
|
||||
# Verify status transition
|
||||
assert result is not None
|
||||
assert result.status == ProcessingStatus.SAVING_OBJECT
|
||||
assert result.error_message is None
|
||||
|
||||
|
||||
class TestDeleteJob:
|
||||
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tasks.common.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
|
||||
from app.utils.pdf_converter import TextToPdfConverter, ImageToPdfConverter, WordToPdfConverter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -20,10 +20,10 @@ def test_i_can_convert_text_to_pdf(temp_dir):
|
||||
input_txt.write_text("Hello World!\nThis is a test.")
|
||||
|
||||
converter = TextToPdfConverter(str(input_txt), output_dir=temp_dir)
|
||||
output_pdf = converter.convert()
|
||||
converter.convert()
|
||||
|
||||
assert Path(output_pdf).exists()
|
||||
assert output_pdf.endswith(".pdf")
|
||||
assert Path(converter.output_path).exists()
|
||||
assert str(converter.output_path).endswith(".pdf")
|
||||
|
||||
|
||||
def test_i_can_convert_image_to_pdf(temp_dir):
|
||||
@@ -34,10 +34,10 @@ def test_i_can_convert_image_to_pdf(temp_dir):
|
||||
image.save(input_img)
|
||||
|
||||
converter = ImageToPdfConverter(str(input_img), output_dir=temp_dir)
|
||||
output_pdf = converter.convert()
|
||||
converter.convert()
|
||||
|
||||
assert Path(output_pdf).exists()
|
||||
assert output_pdf.endswith(".pdf")
|
||||
assert Path(converter.output_path).exists()
|
||||
assert str(converter.output_path).endswith(".pdf")
|
||||
|
||||
|
||||
def test_i_can_convert_word_to_pdf(temp_dir):
|
||||
@@ -49,7 +49,7 @@ def test_i_can_convert_word_to_pdf(temp_dir):
|
||||
doc.save(input_docx)
|
||||
|
||||
converter = WordToPdfConverter(str(input_docx), output_dir=temp_dir)
|
||||
output_pdf = converter.convert()
|
||||
converter.convert()
|
||||
|
||||
assert Path(output_pdf).exists()
|
||||
assert output_pdf.endswith(".pdf")
|
||||
assert Path(converter.output_path).exists()
|
||||
assert str(converter.output_path).endswith(".pdf")
|
||||
Reference in New Issue
Block a user