From 10650420efc926c0512334208c24e7d0d9f4ec18 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Mon, 15 Sep 2025 23:21:09 +0200 Subject: [PATCH] Fisrt commit. Docker compose is working --- .gitignore | 216 +++++++++++++++++++++++ Readme.md | 255 ++++++++++++++++++++++++++++ docker-compose.yml | 73 ++++++++ main.py | 7 + requirements.txt | 33 ++++ src/__init__.py | 0 src/file-processor/Dockerfile | 17 ++ src/file-processor/__init__.py | 0 src/file-processor/app/__init__.py | 0 src/file-processor/app/main.py | 120 +++++++++++++ src/file-processor/requirements.txt | 6 + src/worker/Dockerfile | 14 ++ src/worker/__init__.py | 0 src/worker/requirements.txt | 4 + src/worker/tasks/__init__.py | 0 src/worker/tasks/main.py | 113 ++++++++++++ tests/__init__.py | 0 17 files changed, 858 insertions(+) create mode 100644 .gitignore create mode 100644 Readme.md create mode 100644 docker-compose.yml create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/file-processor/Dockerfile create mode 100644 src/file-processor/__init__.py create mode 100644 src/file-processor/app/__init__.py create mode 100644 src/file-processor/app/main.py create mode 100644 src/file-processor/requirements.txt create mode 100644 src/worker/Dockerfile create mode 100644 src/worker/__init__.py create mode 100644 src/worker/requirements.txt create mode 100644 src/worker/tasks/__init__.py create mode 100644 src/worker/tasks/main.py create mode 100644 tests/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..70054a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..0d5f935 --- /dev/null +++ b/Readme.md @@ -0,0 +1,255 @@ +# MyDocManager + +## Overview + +MyDocManager is a real-time document processing application that automatically detects files in a monitored directory, processes them asynchronously, and stores the results in a database. The application uses a modern microservices architecture with Redis for task queuing and MongoDB for data persistence. + +## Architecture + +### Technology Stack +- **Backend API**: FastAPI (Python 3.12) +- **Task Processing**: Celery with Redis broker +- **Document Processing**: EasyOCR, PyMuPDF, python-docx, pdfplumber +- **Database**: MongoDB +- **Frontend**: React +- **Containerization**: Docker & Docker Compose +- **File Monitoring**: Python watchdog library + +### Services Architecture + ┌─────────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ + │ Frontend │ │ file- │ │ Redis │ │ Worker │ │ MongoDB │ + │ (React) │◄──►│ processor │───►│ (Broker) │◄──►│ (Celery) │───►│ (Results) │ + │ │ │ (FastAPI + │ │ │ │ │ │ │ + │ │ │ watchdog) │ │ │ │ │ │ │ + └─────────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + +### Docker Services +1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch +2. **worker**: Celery workers for document processing (OCR, text extraction) +3. **redis**: Message broker for Celery tasks +4. **mongodb**: Final database for processing results +5. **frontend**: React interface for monitoring and file access + +## Data Flow + +1. **File Detection**: Watchdog monitors target directory in real-time +2. **Task Creation**: FastAPI creates Celery task for each detected file +3. **Task Processing**: Worker processes document (OCR, text extraction) +4. **Result Storage**: Processed data stored in MongoDB +5. **Monitoring**: React frontend displays processing status and results + +## Document Processing Capabilities + +### Supported File Types +- **PDF**: Direct text extraction + OCR for scanned documents +- **Word Documents**: .docx text extraction +- **Images**: OCR text recognition (JPG, PNG, etc.) + +### Processing Libraries +- **EasyOCR**: Modern OCR engine (80+ languages, deep learning-based) +- **PyMuPDF**: PDF text extraction and manipulation +- **python-docx**: Word document processing +- **pdfplumber**: Advanced PDF text extraction + +## Development Environment + +### Container-Based Development +The application is designed for container-based development with hot-reload capabilities: +- Source code mounted as volumes for real-time updates +- All services orchestrated via Docker Compose +- Development and production parity + +### Key Features +- **Real-time Processing**: Immediate file detection and processing +- **Horizontal Scaling**: Multiple workers can be added easily +- **Fault Tolerance**: Celery provides automatic retry mechanisms +- **Monitoring**: Built-in task status tracking +- **Hot Reload**: Development changes reflected instantly in containers + +### Docker Services +1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch +2. **worker**: Celery workers for document processing (OCR, text extraction) +3. **redis**: Message broker for Celery tasks +4. **mongodb**: Final database for processing results +5. **frontend**: React interface for monitoring and file access + +## Project Structure (To be implemented) + +MyDocManager/ +├── docker-compose.yml +├── src/ +│ ├── file-processor/ +│ │ ├── Dockerfile +│ │ ├── requirements.txt +│ │ ├── app/ +│ │ │ ├── main.py +│ │ │ ├── file_watcher.py +│ │ │ ├── celery_app.py +│ │ │ └── api/ +│ ├── worker/ +│ │ ├── Dockerfile +│ │ ├── requirements.txt +│ │ └── tasks/ +│ └── frontend/ +│ ├── Dockerfile +│ ├── package.json +│ └── src/ +├── tests/ +│ ├── file-processor/ +│ └── worker/ +├── volumes/ +│ └── watched_files/ +└── README.md + + +## Docker Commands Reference + +### Initial Setup & Build + +```bash +# Build and start all services (first time) +docker-compose up --build + +# Build and start in background +docker-compose up --build -d + +# Build specific service +docker-compose build file-processor +docker-compose build worker +``` + +### Development Workflow + +```bash +# Start all services +docker-compose up + +# Start in background (detached mode) +docker-compose up -d + +# Stop all services +docker-compose down + +# Stop and remove volumes (⚠️ deletes MongoDB data) +docker-compose down -v + +# Restart specific service +docker-compose restart file-processor +docker-compose restart worker +docker-compose restart redis +docker-compose restart mongodb +``` + +### Monitoring & Debugging + +```bash +# View logs of all services +docker-compose logs + +# View logs of specific service +docker-compose logs file-processor +docker-compose logs worker +docker-compose logs redis +docker-compose logs mongodb + +# Follow logs in real-time +docker-compose logs -f +docker-compose logs -f worker + +# View running containers +docker-compose ps + +# Execute command in running container +docker-compose exec file-processor bash +docker-compose exec worker bash +docker-compose exec mongodb mongosh +``` + +### Service Management + +```bash +# Start only specific services +docker-compose up redis mongodb file-processor + +# Stop specific service +docker-compose stop worker +docker-compose stop file-processor + +# Remove stopped containers +docker-compose rm + +# Scale workers (multiple instances) +docker-compose up --scale worker=3 +``` + +### Hot-Reload Configuration + +- **file-processor**: Hot-reload enabled via `--reload` flag + - Code changes in `src/file-processor/app/` automatically restart FastAPI +- **worker**: No hot-reload (manual restart required for stability) + - Code changes in `src/worker/tasks/` require: `docker-compose restart worker` + +### Useful Service URLs + +- **FastAPI API**: http://localhost:8000 +- **FastAPI Docs**: http://localhost:8000/docs +- **Health Check**: http://localhost:8000/health +- **Redis**: localhost:6379 +- **MongoDB**: localhost:27017 + +### Testing Commands + +```bash +# Test FastAPI health +curl http://localhost:8000/health + +# Test Celery task dispatch +curl -X POST http://localhost:8000/test-task \ + -H "Content-Type: application/json" \ + -d '{"message": "Hello from test!"}' + +# Monitor Celery tasks +docker-compose logs -f worker +``` + + +## Key Implementation Notes + +### Python Standards +- **Style**: PEP 8 compliance +- **Documentation**: Google/NumPy docstring format +- **Naming**: snake_case for variables and functions +- **Testing**: pytest with test_i_can_xxx / test_i_cannot_xxx patterns + +### Dependencies Management +- **Package Manager**: pip (standard) +- **External Dependencies**: Listed in each service's requirements.txt +- **Standard Library First**: Prefer standard library when possible + +### Testing Strategy +- All code must be testable +- Unit tests for each processing function +- Integration tests for file processing workflow +- Tests validated before implementation + +### Critical Architecture Decisions Made +1. **Option Selected**: Single FastAPI service handles both API and file watching +2. **Celery with Redis**: Chosen over other async patterns for scalability +3. **EasyOCR Preferred**: Selected over Tesseract for modern OCR needs +4. **Container Development**: Hot-reload setup required for development workflow + +### Development Process Requirements +1. **Collaborative Validation**: All options must be explained before coding +2. **Test-First Approach**: Test cases defined and validated before implementation +3. **Incremental Development**: Start simple, extend functionality progressively +4. **Error Handling**: Clear problem explanation required before proposing fixes + +### Next Implementation Steps +1. Create docker-compose.yml with all services +2. Implement basic FastAPI service structure +3. Add watchdog file monitoring +4. Create Celery task structure +5. Implement document processing tasks +6. Build React monitoring interface + +""" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cbc28bc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,73 @@ +version: '3.8' + +services: + # Redis - Message broker for Celery + redis: + image: redis:8-alpine + container_name: mydocmanager-redis + ports: + - "6379:6379" + networks: + - mydocmanager-network + + # MongoDB - Final database for results + mongodb: + image: mongo:7 + container_name: mydocmanager-mongodb + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: admin + MONGO_INITDB_ROOT_PASSWORD: password123 + MONGO_INITDB_DATABASE: mydocmanager + volumes: + - mongodb-data:/data/db + networks: + - mydocmanager-network + + # File Processor - FastAPI + file monitoring + Celery task dispatch + file-processor: + build: + context: ./src/file-processor + dockerfile: Dockerfile + container_name: mydocmanager-file-processor + ports: + - "8000:8000" + environment: + - REDIS_URL=redis://redis:6379/0 + - MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin + volumes: + - ./src/file-processor/app:/app + - ./volumes/watched_files:/watched_files + depends_on: + - redis + - mongodb + networks: + - mydocmanager-network + command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload + + # Worker - Celery workers for document processing + worker: + build: + context: ./src/worker + dockerfile: Dockerfile + container_name: mydocmanager-worker + environment: + - REDIS_URL=redis://redis:6379/0 + - MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin + volumes: + - ./src/worker/tasks:/app + - ./volumes/watched_files:/watched_files + depends_on: + - redis + - mongodb + networks: + - mydocmanager-network + command: celery -A main worker --loglevel=info + +volumes: + mongodb-data: + +networks: + mydocmanager-network: + driver: bridge \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..0dbfd0e --- /dev/null +++ b/main.py @@ -0,0 +1,7 @@ + +def main(): + print("Hello word !") + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b2f9cab --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +amqp==5.3.1 +annotated-types==0.7.0 +anyio==4.10.0 +billiard==4.2.1 +celery==5.5.3 +click==8.2.1 +click-didyoumean==0.3.1 +click-plugins==1.1.1.2 +click-repl==0.3.0 +fastapi==0.116.1 +h11==0.16.0 +httptools==0.6.4 +idna==3.10 +kombu==5.5.4 +packaging==25.0 +prompt_toolkit==3.0.52 +pydantic==2.11.9 +pydantic_core==2.33.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +PyYAML==6.0.2 +six==1.17.0 +sniffio==1.3.1 +starlette==0.47.3 +typing-inspection==0.4.1 +typing_extensions==4.15.0 +tzdata==2025.2 +uvicorn==0.35.0 +uvloop==0.21.0 +vine==5.1.0 +watchfiles==1.1.0 +wcwidth==0.2.13 +websockets==15.0.1 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/file-processor/Dockerfile b/src/file-processor/Dockerfile new file mode 100644 index 0000000..86d00c5 --- /dev/null +++ b/src/file-processor/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12-slim + +# Set working directory +WORKDIR /app + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app/ . + +# Expose port +EXPOSE 8000 + +# Command will be overridden by docker-compose +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/file-processor/__init__.py b/src/file-processor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/file-processor/app/__init__.py b/src/file-processor/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/file-processor/app/main.py b/src/file-processor/app/main.py new file mode 100644 index 0000000..62f5ec5 --- /dev/null +++ b/src/file-processor/app/main.py @@ -0,0 +1,120 @@ +""" +FastAPI application for MyDocManager file processor service. + +This service provides API endpoints for health checks and task dispatching. +""" + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import redis +from celery import Celery + +# Initialize FastAPI app +app = FastAPI( + title="MyDocManager File Processor", + description="File processing and task dispatch service", + version="1.0.0" +) + +# Environment variables +REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") +MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") + +# Initialize Redis client +try: + redis_client = redis.from_url(REDIS_URL) +except Exception as e: + redis_client = None + print(f"Warning: Could not connect to Redis: {e}") + +# Initialize Celery +celery_app = Celery( + "file_processor", + broker=REDIS_URL, + backend=REDIS_URL +) + + +# Pydantic models +class TestTaskRequest(BaseModel): + """Request model for test task.""" + message: str + + +@app.get("/health") +async def health_check(): + """ + Health check endpoint. + + Returns: + dict: Service health status with dependencies + """ + health_status = { + "status": "healthy", + "service": "file-processor", + "dependencies": { + "redis": "unknown", + "mongodb": "unknown" + }, + } + + # Check Redis connection + if redis_client: + try: + redis_client.ping() + health_status["dependencies"]["redis"] = "connected" + except Exception: + health_status["dependencies"]["redis"] = "disconnected" + health_status["status"] = "degraded" + + return health_status + + +@app.post("/test-task") +async def dispatch_test_task(request: TestTaskRequest): + """ + Dispatch a test task to Celery worker. + + Args: + request: Test task request containing message + + Returns: + dict: Task dispatch information + + Raises: + HTTPException: If task dispatch fails + """ + try: + # Send task to worker + task = celery_app.send_task( + "main.test_task", + args=[request.message] + ) + + return { + "status": "dispatched", + "task_id": task.id, + "message": f"Test task dispatched with message: {request.message}" + } + + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to dispatch task: {str(e)}" + ) + + +@app.get("/") +async def root(): + """ + Root endpoint. + + Returns: + dict: Basic service information + """ + return { + "service": "MyDocManager File Processor", + "version": "1.0.0", + "status": "running" + } \ No newline at end of file diff --git a/src/file-processor/requirements.txt b/src/file-processor/requirements.txt new file mode 100644 index 0000000..768ae61 --- /dev/null +++ b/src/file-processor/requirements.txt @@ -0,0 +1,6 @@ +fastapi==0.116.1 +uvicorn==0.35.0 +celery==5.5.3 +redis==6.4.0 +pymongo==4.15.0 +pydantic==2.11.9 \ No newline at end of file diff --git a/src/worker/Dockerfile b/src/worker/Dockerfile new file mode 100644 index 0000000..8723a3e --- /dev/null +++ b/src/worker/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.12-slim + +# Set working directory +WORKDIR /app + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY tasks/ . + +# Command will be overridden by docker-compose +CMD ["celery", "-A", "main", "worker", "--loglevel=info"] \ No newline at end of file diff --git a/src/worker/__init__.py b/src/worker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/worker/requirements.txt b/src/worker/requirements.txt new file mode 100644 index 0000000..af2f3cd --- /dev/null +++ b/src/worker/requirements.txt @@ -0,0 +1,4 @@ + +celery==5.5.3 +redis==6.4.0 +pymongo==4.15.0 \ No newline at end of file diff --git a/src/worker/tasks/__init__.py b/src/worker/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/worker/tasks/main.py b/src/worker/tasks/main.py new file mode 100644 index 0000000..63b2c5d --- /dev/null +++ b/src/worker/tasks/main.py @@ -0,0 +1,113 @@ +""" +Celery worker for MyDocManager document processing tasks. + +This module contains all Celery tasks for processing documents. +""" + +import os +import time +from celery import Celery + +# Environment variables +REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") +MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") + +# Initialize Celery app +app = Celery( + "mydocmanager_worker", + broker=REDIS_URL, + backend=REDIS_URL +) + +# Celery configuration +app.conf.update( + task_serializer="json", + accept_content=["json"], + result_serializer="json", + timezone="UTC", + enable_utc=True, + task_track_started=True, + task_time_limit=300, # 5 minutes + task_soft_time_limit=240, # 4 minutes +) + + +@app.task(bind=True) +def test_task(self, message: str): + """ + Test task for validating worker functionality. + + Args: + message: Test message to process + + Returns: + dict: Task result with processing information + """ + try: + print(f"[WORKER] Starting test task with message: {message}") + + # Simulate some work + for i in range(5): + print(f"[WORKER] Processing step {i + 1}/5...") + time.sleep(1) + + # Update task progress + self.update_state( + state="PROGRESS", + meta={ + "current": i + 1, + "total": 5, + "message": f"Processing step {i + 1}" + } + ) + + result = { + "status": "completed", + "message": f"Successfully processed: {message}", + "processed_at": time.time(), + "worker_id": self.request.id + } + + print(f"[WORKER] Test task completed successfully: {result}") + return result + + except Exception as exc: + print(f"[WORKER] Test task failed: {str(exc)}") + raise self.retry(exc=exc, countdown=60, max_retries=3) + + +@app.task(bind=True) +def process_document_task(self, file_path: str): + """ + Placeholder task for document processing. + + Args: + file_path: Path to the document to process + + Returns: + dict: Processing result + """ + try: + print(f"[WORKER] Starting document processing for: {file_path}") + + # Placeholder for document processing logic + time.sleep(2) # Simulate processing time + + result = { + "status": "completed", + "file_path": file_path, + "processed_at": time.time(), + "content": f"Placeholder content for {file_path}", + "worker_id": self.request.id + } + + print(f"[WORKER] Document processing completed: {file_path}") + return result + + except Exception as exc: + print(f"[WORKER] Document processing failed for {file_path}: {str(exc)}") + raise self.retry(exc=exc, countdown=60, max_retries=3) + + +if __name__ == "__main__": + app.start() \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29