Implemented default pipeline

This commit is contained in:
2025-09-26 22:08:39 +02:00
parent f1b551d243
commit 4de732b0ae
56 changed files with 4534 additions and 2837 deletions

View File

@@ -1,203 +1,169 @@
"""
FastAPI application for MyDocManager file processor service.
FastAPI application with integrated FileWatcher for document processing.
This service provides API endpoints for health checks and task dispatching.
This module provides the main FastAPI application with:
- JWT authentication
- User management APIs
- Real-time file monitoring via FileWatcher
- Document processing via Celery tasks
"""
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Depends
from pydantic import BaseModel
import redis
from celery import Celery
from typing import AsyncGenerator
from app.database.connection import test_database_connection, get_database
from app.database.repositories.user_repository import UserRepository
from app.models.user import UserCreate
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api.routes.auth import router as auth_router
from app.api.routes.users import router as users_router
from app.config import settings
from app.database.connection import get_database
from app.file_watcher import create_file_watcher, FileWatcher
from app.services.document_service import DocumentService
from app.services.init_service import InitializationService
from app.services.job_service import JobService
from app.services.user_service import UserService
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global file watcher instance
file_watcher: FileWatcher = None
@asynccontextmanager
async def lifespan(app: FastAPI):
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
"""
Application lifespan manager for startup and shutdown tasks.
Handles initialization tasks that need to run when the application starts,
including admin user creation and other setup procedures.
FastAPI lifespan context manager.
Handles application startup and shutdown events including:
- Database connection
- Default admin user creation
- FileWatcher startup/shutdown
"""
# Startup tasks
global file_watcher
# Startup
logger.info("Starting MyDocManager application...")
try:
# Initialize database connection
database = get_database()
logger.info("Database connection established")
# Initialize repositories and services
user_repository = UserRepository(database)
user_service = UserService(user_repository)
document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
job_service = JobService(database=database)
user_service = UserService(database=database)
logger.info("Service created")
# Create default admin user
init_service = InitializationService(user_service)
init_service.initialize_application()
logger.info("Default admin user initialization completed")
# Run initialization tasks
initialization_result = init_service.initialize_application()
# Create and start file watcher
file_watcher = create_file_watcher(
watch_directory=settings.watch_directory(),
document_service=document_service,
job_service=job_service
)
file_watcher.start()
logger.info(f"FileWatcher started for directory: {settings.watch_directory()}")
if initialization_result["initialization_success"]:
logger.info("Application startup completed successfully")
if initialization_result["admin_user_created"]:
logger.info("Default admin user was created during startup")
else:
logger.error("Application startup completed with errors:")
for error in initialization_result["errors"]:
logger.error(f" - {error}")
logger.info("Application startup completed successfully")
yield
except Exception as e:
logger.error(f"Critical error during application startup: {str(e)}")
# You might want to decide if the app should continue or exit here
# For now, we log the error but continue
logger.error(f"Application startup failed: {str(e)}")
raise
yield # Application is running
# Shutdown tasks (if needed)
logger.info("Shutting down MyDocManager application...")
finally:
# Shutdown
logger.info("Shutting down MyDocManager application...")
if file_watcher and file_watcher.is_running():
file_watcher.stop()
logger.info("FileWatcher stopped")
logger.info("Application shutdown completed")
# Initialize FastAPI app
# Create FastAPI application
app = FastAPI(
title="MyDocManager File Processor",
description="File processing and task dispatch service",
version="1.0.0",
title="MyDocManager",
description="Real-time document processing application with authentication",
version="0.1.0",
lifespan=lifespan
)
# Environment variables
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
# Initialize Redis client
try:
redis_client = redis.from_url(REDIS_URL)
except Exception as e:
redis_client = None
print(f"Warning: Could not connect to Redis: {e}")
# Initialize Celery
celery_app = Celery(
"file_processor",
broker=REDIS_URL,
backend=REDIS_URL
# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:5173"], # React frontend
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
app.include_router(auth_router, prefix="/auth", tags=["Authentication"])
app.include_router(users_router, prefix="/users", tags=["User Management"])
# app.include_router(documents_router, prefix="/documents", tags=["Documents"])
# app.include_router(jobs_router, prefix="/jobs", tags=["Processing Jobs"])
# Pydantic models
class TestTaskRequest(BaseModel):
"""Request model for test task."""
message: str
def get_user_service() -> UserService:
"""
Dependency to get user service instance.
This should be properly implemented with database connection management
in your actual application.
"""
database = get_database()
user_repository = UserRepository(database)
return UserService(user_repository)
# Your API routes would use the service like this:
@app.post("/api/users")
async def create_user(
user_data: UserCreate,
user_service: UserService = Depends(get_user_service)
):
return user_service.create_user(user_data)
@app.get("/health")
async def health_check():
"""
Health check endpoint.
Returns:
dict: Service health status with dependencies
Dictionary containing application health status
"""
health_status = {
return {
"status": "healthy",
"service": "file-processor",
"dependencies": {
"redis": "unknown",
"mongodb": "unknown"
},
"service": "MyDocManager",
"version": "1.0.0",
"file_watcher_running": file_watcher.is_running() if file_watcher else False
}
# Check Redis connection
if redis_client:
try:
redis_client.ping()
health_status["dependencies"]["redis"] = "connected"
except Exception:
health_status["dependencies"]["redis"] = "disconnected"
health_status["status"] = "degraded"
# check MongoDB connection
if test_database_connection():
health_status["dependencies"]["mongodb"] = "connected"
else:
health_status["dependencies"]["mongodb"] = "disconnected"
return health_status
@app.post("/test-task")
async def dispatch_test_task(request: TestTaskRequest):
"""
Dispatch a test task to Celery worker.
Args:
request: Test task request containing message
Returns:
dict: Task dispatch information
Raises:
HTTPException: If task dispatch fails
"""
try:
# Send task to worker
task = celery_app.send_task(
"main.test_task",
args=[request.message]
)
return {
"status": "dispatched",
"task_id": task.id,
"message": f"Test task dispatched with message: {request.message}"
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to dispatch task: {str(e)}"
)
@app.get("/")
async def root():
"""
Root endpoint.
Root endpoint with basic application information.
Returns:
dict: Basic service information
Dictionary containing welcome message and available endpoints
"""
return {
"service": "MyDocManager File Processor",
"version": "1.0.0",
"status": "running"
"message": "Welcome to MyDocManager",
"description": "Real-time document processing application",
"docs": "/docs",
"health": "/health"
}
@app.get("/watcher/status")
async def watcher_status():
"""
Get file watcher status.
Returns:
Dictionary containing file watcher status information
"""
if not file_watcher:
return {
"status": "not_initialized",
"running": False
}
return {
"status": "initialized",
"running": file_watcher.is_running(),
"watch_directory": str(file_watcher.watch_directory),
"recursive": file_watcher.recursive
}