Fisrt commit. Docker compose is working
This commit is contained in:
216
.gitignore
vendored
Normal file
216
.gitignore
vendored
Normal file
@@ -0,0 +1,216 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
# Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
# poetry.lock
|
||||
# poetry.toml
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||
# pdm.lock
|
||||
# pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# pixi
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||
# pixi.lock
|
||||
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||
.pixi
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# Redis
|
||||
*.rdb
|
||||
*.aof
|
||||
*.pid
|
||||
|
||||
# RabbitMQ
|
||||
mnesia/
|
||||
rabbitmq/
|
||||
rabbitmq-data/
|
||||
|
||||
# ActiveMQ
|
||||
activemq-data/
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
|
||||
# Abstra
|
||||
# Abstra is an AI-powered process automation framework.
|
||||
# Ignore directories containing user credentials, local state, and settings.
|
||||
# Learn more at https://abstra.io/docs
|
||||
.abstra/
|
||||
|
||||
# Visual Studio Code
|
||||
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||
# you could uncomment the following to ignore the entire vscode folder
|
||||
# .vscode/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Marimo
|
||||
marimo/_static/
|
||||
marimo/_lsp/
|
||||
__marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
255
Readme.md
Normal file
255
Readme.md
Normal file
@@ -0,0 +1,255 @@
|
||||
# MyDocManager
|
||||
|
||||
## Overview
|
||||
|
||||
MyDocManager is a real-time document processing application that automatically detects files in a monitored directory, processes them asynchronously, and stores the results in a database. The application uses a modern microservices architecture with Redis for task queuing and MongoDB for data persistence.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Technology Stack
|
||||
- **Backend API**: FastAPI (Python 3.12)
|
||||
- **Task Processing**: Celery with Redis broker
|
||||
- **Document Processing**: EasyOCR, PyMuPDF, python-docx, pdfplumber
|
||||
- **Database**: MongoDB
|
||||
- **Frontend**: React
|
||||
- **Containerization**: Docker & Docker Compose
|
||||
- **File Monitoring**: Python watchdog library
|
||||
|
||||
### Services Architecture
|
||||
┌─────────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Frontend │ │ file- │ │ Redis │ │ Worker │ │ MongoDB │
|
||||
│ (React) │◄──►│ processor │───►│ (Broker) │◄──►│ (Celery) │───►│ (Results) │
|
||||
│ │ │ (FastAPI + │ │ │ │ │ │ │
|
||||
│ │ │ watchdog) │ │ │ │ │ │ │
|
||||
└─────────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||
|
||||
### Docker Services
|
||||
1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch
|
||||
2. **worker**: Celery workers for document processing (OCR, text extraction)
|
||||
3. **redis**: Message broker for Celery tasks
|
||||
4. **mongodb**: Final database for processing results
|
||||
5. **frontend**: React interface for monitoring and file access
|
||||
|
||||
## Data Flow
|
||||
|
||||
1. **File Detection**: Watchdog monitors target directory in real-time
|
||||
2. **Task Creation**: FastAPI creates Celery task for each detected file
|
||||
3. **Task Processing**: Worker processes document (OCR, text extraction)
|
||||
4. **Result Storage**: Processed data stored in MongoDB
|
||||
5. **Monitoring**: React frontend displays processing status and results
|
||||
|
||||
## Document Processing Capabilities
|
||||
|
||||
### Supported File Types
|
||||
- **PDF**: Direct text extraction + OCR for scanned documents
|
||||
- **Word Documents**: .docx text extraction
|
||||
- **Images**: OCR text recognition (JPG, PNG, etc.)
|
||||
|
||||
### Processing Libraries
|
||||
- **EasyOCR**: Modern OCR engine (80+ languages, deep learning-based)
|
||||
- **PyMuPDF**: PDF text extraction and manipulation
|
||||
- **python-docx**: Word document processing
|
||||
- **pdfplumber**: Advanced PDF text extraction
|
||||
|
||||
## Development Environment
|
||||
|
||||
### Container-Based Development
|
||||
The application is designed for container-based development with hot-reload capabilities:
|
||||
- Source code mounted as volumes for real-time updates
|
||||
- All services orchestrated via Docker Compose
|
||||
- Development and production parity
|
||||
|
||||
### Key Features
|
||||
- **Real-time Processing**: Immediate file detection and processing
|
||||
- **Horizontal Scaling**: Multiple workers can be added easily
|
||||
- **Fault Tolerance**: Celery provides automatic retry mechanisms
|
||||
- **Monitoring**: Built-in task status tracking
|
||||
- **Hot Reload**: Development changes reflected instantly in containers
|
||||
|
||||
### Docker Services
|
||||
1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch
|
||||
2. **worker**: Celery workers for document processing (OCR, text extraction)
|
||||
3. **redis**: Message broker for Celery tasks
|
||||
4. **mongodb**: Final database for processing results
|
||||
5. **frontend**: React interface for monitoring and file access
|
||||
|
||||
## Project Structure (To be implemented)
|
||||
|
||||
MyDocManager/
|
||||
├── docker-compose.yml
|
||||
├── src/
|
||||
│ ├── file-processor/
|
||||
│ │ ├── Dockerfile
|
||||
│ │ ├── requirements.txt
|
||||
│ │ ├── app/
|
||||
│ │ │ ├── main.py
|
||||
│ │ │ ├── file_watcher.py
|
||||
│ │ │ ├── celery_app.py
|
||||
│ │ │ └── api/
|
||||
│ ├── worker/
|
||||
│ │ ├── Dockerfile
|
||||
│ │ ├── requirements.txt
|
||||
│ │ └── tasks/
|
||||
│ └── frontend/
|
||||
│ ├── Dockerfile
|
||||
│ ├── package.json
|
||||
│ └── src/
|
||||
├── tests/
|
||||
│ ├── file-processor/
|
||||
│ └── worker/
|
||||
├── volumes/
|
||||
│ └── watched_files/
|
||||
└── README.md
|
||||
|
||||
|
||||
## Docker Commands Reference
|
||||
|
||||
### Initial Setup & Build
|
||||
|
||||
```bash
|
||||
# Build and start all services (first time)
|
||||
docker-compose up --build
|
||||
|
||||
# Build and start in background
|
||||
docker-compose up --build -d
|
||||
|
||||
# Build specific service
|
||||
docker-compose build file-processor
|
||||
docker-compose build worker
|
||||
```
|
||||
|
||||
### Development Workflow
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
docker-compose up
|
||||
|
||||
# Start in background (detached mode)
|
||||
docker-compose up -d
|
||||
|
||||
# Stop all services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (⚠️ deletes MongoDB data)
|
||||
docker-compose down -v
|
||||
|
||||
# Restart specific service
|
||||
docker-compose restart file-processor
|
||||
docker-compose restart worker
|
||||
docker-compose restart redis
|
||||
docker-compose restart mongodb
|
||||
```
|
||||
|
||||
### Monitoring & Debugging
|
||||
|
||||
```bash
|
||||
# View logs of all services
|
||||
docker-compose logs
|
||||
|
||||
# View logs of specific service
|
||||
docker-compose logs file-processor
|
||||
docker-compose logs worker
|
||||
docker-compose logs redis
|
||||
docker-compose logs mongodb
|
||||
|
||||
# Follow logs in real-time
|
||||
docker-compose logs -f
|
||||
docker-compose logs -f worker
|
||||
|
||||
# View running containers
|
||||
docker-compose ps
|
||||
|
||||
# Execute command in running container
|
||||
docker-compose exec file-processor bash
|
||||
docker-compose exec worker bash
|
||||
docker-compose exec mongodb mongosh
|
||||
```
|
||||
|
||||
### Service Management
|
||||
|
||||
```bash
|
||||
# Start only specific services
|
||||
docker-compose up redis mongodb file-processor
|
||||
|
||||
# Stop specific service
|
||||
docker-compose stop worker
|
||||
docker-compose stop file-processor
|
||||
|
||||
# Remove stopped containers
|
||||
docker-compose rm
|
||||
|
||||
# Scale workers (multiple instances)
|
||||
docker-compose up --scale worker=3
|
||||
```
|
||||
|
||||
### Hot-Reload Configuration
|
||||
|
||||
- **file-processor**: Hot-reload enabled via `--reload` flag
|
||||
- Code changes in `src/file-processor/app/` automatically restart FastAPI
|
||||
- **worker**: No hot-reload (manual restart required for stability)
|
||||
- Code changes in `src/worker/tasks/` require: `docker-compose restart worker`
|
||||
|
||||
### Useful Service URLs
|
||||
|
||||
- **FastAPI API**: http://localhost:8000
|
||||
- **FastAPI Docs**: http://localhost:8000/docs
|
||||
- **Health Check**: http://localhost:8000/health
|
||||
- **Redis**: localhost:6379
|
||||
- **MongoDB**: localhost:27017
|
||||
|
||||
### Testing Commands
|
||||
|
||||
```bash
|
||||
# Test FastAPI health
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Test Celery task dispatch
|
||||
curl -X POST http://localhost:8000/test-task \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"message": "Hello from test!"}'
|
||||
|
||||
# Monitor Celery tasks
|
||||
docker-compose logs -f worker
|
||||
```
|
||||
|
||||
|
||||
## Key Implementation Notes
|
||||
|
||||
### Python Standards
|
||||
- **Style**: PEP 8 compliance
|
||||
- **Documentation**: Google/NumPy docstring format
|
||||
- **Naming**: snake_case for variables and functions
|
||||
- **Testing**: pytest with test_i_can_xxx / test_i_cannot_xxx patterns
|
||||
|
||||
### Dependencies Management
|
||||
- **Package Manager**: pip (standard)
|
||||
- **External Dependencies**: Listed in each service's requirements.txt
|
||||
- **Standard Library First**: Prefer standard library when possible
|
||||
|
||||
### Testing Strategy
|
||||
- All code must be testable
|
||||
- Unit tests for each processing function
|
||||
- Integration tests for file processing workflow
|
||||
- Tests validated before implementation
|
||||
|
||||
### Critical Architecture Decisions Made
|
||||
1. **Option Selected**: Single FastAPI service handles both API and file watching
|
||||
2. **Celery with Redis**: Chosen over other async patterns for scalability
|
||||
3. **EasyOCR Preferred**: Selected over Tesseract for modern OCR needs
|
||||
4. **Container Development**: Hot-reload setup required for development workflow
|
||||
|
||||
### Development Process Requirements
|
||||
1. **Collaborative Validation**: All options must be explained before coding
|
||||
2. **Test-First Approach**: Test cases defined and validated before implementation
|
||||
3. **Incremental Development**: Start simple, extend functionality progressively
|
||||
4. **Error Handling**: Clear problem explanation required before proposing fixes
|
||||
|
||||
### Next Implementation Steps
|
||||
1. Create docker-compose.yml with all services
|
||||
2. Implement basic FastAPI service structure
|
||||
3. Add watchdog file monitoring
|
||||
4. Create Celery task structure
|
||||
5. Implement document processing tasks
|
||||
6. Build React monitoring interface
|
||||
|
||||
"""
|
||||
73
docker-compose.yml
Normal file
73
docker-compose.yml
Normal file
@@ -0,0 +1,73 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# Redis - Message broker for Celery
|
||||
redis:
|
||||
image: redis:8-alpine
|
||||
container_name: mydocmanager-redis
|
||||
ports:
|
||||
- "6379:6379"
|
||||
networks:
|
||||
- mydocmanager-network
|
||||
|
||||
# MongoDB - Final database for results
|
||||
mongodb:
|
||||
image: mongo:7
|
||||
container_name: mydocmanager-mongodb
|
||||
ports:
|
||||
- "27017:27017"
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: admin
|
||||
MONGO_INITDB_ROOT_PASSWORD: password123
|
||||
MONGO_INITDB_DATABASE: mydocmanager
|
||||
volumes:
|
||||
- mongodb-data:/data/db
|
||||
networks:
|
||||
- mydocmanager-network
|
||||
|
||||
# File Processor - FastAPI + file monitoring + Celery task dispatch
|
||||
file-processor:
|
||||
build:
|
||||
context: ./src/file-processor
|
||||
dockerfile: Dockerfile
|
||||
container_name: mydocmanager-file-processor
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||
volumes:
|
||||
- ./src/file-processor/app:/app
|
||||
- ./volumes/watched_files:/watched_files
|
||||
depends_on:
|
||||
- redis
|
||||
- mongodb
|
||||
networks:
|
||||
- mydocmanager-network
|
||||
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
# Worker - Celery workers for document processing
|
||||
worker:
|
||||
build:
|
||||
context: ./src/worker
|
||||
dockerfile: Dockerfile
|
||||
container_name: mydocmanager-worker
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||
volumes:
|
||||
- ./src/worker/tasks:/app
|
||||
- ./volumes/watched_files:/watched_files
|
||||
depends_on:
|
||||
- redis
|
||||
- mongodb
|
||||
networks:
|
||||
- mydocmanager-network
|
||||
command: celery -A main worker --loglevel=info
|
||||
|
||||
volumes:
|
||||
mongodb-data:
|
||||
|
||||
networks:
|
||||
mydocmanager-network:
|
||||
driver: bridge
|
||||
7
main.py
Normal file
7
main.py
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
def main():
|
||||
print("Hello word !")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
requirements.txt
Normal file
33
requirements.txt
Normal file
@@ -0,0 +1,33 @@
|
||||
amqp==5.3.1
|
||||
annotated-types==0.7.0
|
||||
anyio==4.10.0
|
||||
billiard==4.2.1
|
||||
celery==5.5.3
|
||||
click==8.2.1
|
||||
click-didyoumean==0.3.1
|
||||
click-plugins==1.1.1.2
|
||||
click-repl==0.3.0
|
||||
fastapi==0.116.1
|
||||
h11==0.16.0
|
||||
httptools==0.6.4
|
||||
idna==3.10
|
||||
kombu==5.5.4
|
||||
packaging==25.0
|
||||
prompt_toolkit==3.0.52
|
||||
pydantic==2.11.9
|
||||
pydantic_core==2.33.2
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.1.1
|
||||
PyYAML==6.0.2
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
starlette==0.47.3
|
||||
typing-inspection==0.4.1
|
||||
typing_extensions==4.15.0
|
||||
tzdata==2025.2
|
||||
uvicorn==0.35.0
|
||||
uvloop==0.21.0
|
||||
vine==5.1.0
|
||||
watchfiles==1.1.0
|
||||
wcwidth==0.2.13
|
||||
websockets==15.0.1
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
17
src/file-processor/Dockerfile
Normal file
17
src/file-processor/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY app/ .
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Command will be overridden by docker-compose
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
0
src/file-processor/__init__.py
Normal file
0
src/file-processor/__init__.py
Normal file
0
src/file-processor/app/__init__.py
Normal file
0
src/file-processor/app/__init__.py
Normal file
120
src/file-processor/app/main.py
Normal file
120
src/file-processor/app/main.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
FastAPI application for MyDocManager file processor service.
|
||||
|
||||
This service provides API endpoints for health checks and task dispatching.
|
||||
"""
|
||||
|
||||
import os
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import redis
|
||||
from celery import Celery
|
||||
|
||||
# Initialize FastAPI app
|
||||
app = FastAPI(
|
||||
title="MyDocManager File Processor",
|
||||
description="File processing and task dispatch service",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Environment variables
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
|
||||
# Initialize Redis client
|
||||
try:
|
||||
redis_client = redis.from_url(REDIS_URL)
|
||||
except Exception as e:
|
||||
redis_client = None
|
||||
print(f"Warning: Could not connect to Redis: {e}")
|
||||
|
||||
# Initialize Celery
|
||||
celery_app = Celery(
|
||||
"file_processor",
|
||||
broker=REDIS_URL,
|
||||
backend=REDIS_URL
|
||||
)
|
||||
|
||||
|
||||
# Pydantic models
|
||||
class TestTaskRequest(BaseModel):
|
||||
"""Request model for test task."""
|
||||
message: str
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
|
||||
Returns:
|
||||
dict: Service health status with dependencies
|
||||
"""
|
||||
health_status = {
|
||||
"status": "healthy",
|
||||
"service": "file-processor",
|
||||
"dependencies": {
|
||||
"redis": "unknown",
|
||||
"mongodb": "unknown"
|
||||
},
|
||||
}
|
||||
|
||||
# Check Redis connection
|
||||
if redis_client:
|
||||
try:
|
||||
redis_client.ping()
|
||||
health_status["dependencies"]["redis"] = "connected"
|
||||
except Exception:
|
||||
health_status["dependencies"]["redis"] = "disconnected"
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
return health_status
|
||||
|
||||
|
||||
@app.post("/test-task")
|
||||
async def dispatch_test_task(request: TestTaskRequest):
|
||||
"""
|
||||
Dispatch a test task to Celery worker.
|
||||
|
||||
Args:
|
||||
request: Test task request containing message
|
||||
|
||||
Returns:
|
||||
dict: Task dispatch information
|
||||
|
||||
Raises:
|
||||
HTTPException: If task dispatch fails
|
||||
"""
|
||||
try:
|
||||
# Send task to worker
|
||||
task = celery_app.send_task(
|
||||
"main.test_task",
|
||||
args=[request.message]
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "dispatched",
|
||||
"task_id": task.id,
|
||||
"message": f"Test task dispatched with message: {request.message}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to dispatch task: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""
|
||||
Root endpoint.
|
||||
|
||||
Returns:
|
||||
dict: Basic service information
|
||||
"""
|
||||
return {
|
||||
"service": "MyDocManager File Processor",
|
||||
"version": "1.0.0",
|
||||
"status": "running"
|
||||
}
|
||||
6
src/file-processor/requirements.txt
Normal file
6
src/file-processor/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
fastapi==0.116.1
|
||||
uvicorn==0.35.0
|
||||
celery==5.5.3
|
||||
redis==6.4.0
|
||||
pymongo==4.15.0
|
||||
pydantic==2.11.9
|
||||
14
src/worker/Dockerfile
Normal file
14
src/worker/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY tasks/ .
|
||||
|
||||
# Command will be overridden by docker-compose
|
||||
CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
|
||||
0
src/worker/__init__.py
Normal file
0
src/worker/__init__.py
Normal file
4
src/worker/requirements.txt
Normal file
4
src/worker/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
celery==5.5.3
|
||||
redis==6.4.0
|
||||
pymongo==4.15.0
|
||||
0
src/worker/tasks/__init__.py
Normal file
0
src/worker/tasks/__init__.py
Normal file
113
src/worker/tasks/main.py
Normal file
113
src/worker/tasks/main.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
Celery worker for MyDocManager document processing tasks.
|
||||
|
||||
This module contains all Celery tasks for processing documents.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from celery import Celery
|
||||
|
||||
# Environment variables
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
|
||||
# Initialize Celery app
|
||||
app = Celery(
|
||||
"mydocmanager_worker",
|
||||
broker=REDIS_URL,
|
||||
backend=REDIS_URL
|
||||
)
|
||||
|
||||
# Celery configuration
|
||||
app.conf.update(
|
||||
task_serializer="json",
|
||||
accept_content=["json"],
|
||||
result_serializer="json",
|
||||
timezone="UTC",
|
||||
enable_utc=True,
|
||||
task_track_started=True,
|
||||
task_time_limit=300, # 5 minutes
|
||||
task_soft_time_limit=240, # 4 minutes
|
||||
)
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
def test_task(self, message: str):
|
||||
"""
|
||||
Test task for validating worker functionality.
|
||||
|
||||
Args:
|
||||
message: Test message to process
|
||||
|
||||
Returns:
|
||||
dict: Task result with processing information
|
||||
"""
|
||||
try:
|
||||
print(f"[WORKER] Starting test task with message: {message}")
|
||||
|
||||
# Simulate some work
|
||||
for i in range(5):
|
||||
print(f"[WORKER] Processing step {i + 1}/5...")
|
||||
time.sleep(1)
|
||||
|
||||
# Update task progress
|
||||
self.update_state(
|
||||
state="PROGRESS",
|
||||
meta={
|
||||
"current": i + 1,
|
||||
"total": 5,
|
||||
"message": f"Processing step {i + 1}"
|
||||
}
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"message": f"Successfully processed: {message}",
|
||||
"processed_at": time.time(),
|
||||
"worker_id": self.request.id
|
||||
}
|
||||
|
||||
print(f"[WORKER] Test task completed successfully: {result}")
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
print(f"[WORKER] Test task failed: {str(exc)}")
|
||||
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
def process_document_task(self, file_path: str):
|
||||
"""
|
||||
Placeholder task for document processing.
|
||||
|
||||
Args:
|
||||
file_path: Path to the document to process
|
||||
|
||||
Returns:
|
||||
dict: Processing result
|
||||
"""
|
||||
try:
|
||||
print(f"[WORKER] Starting document processing for: {file_path}")
|
||||
|
||||
# Placeholder for document processing logic
|
||||
time.sleep(2) # Simulate processing time
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"file_path": file_path,
|
||||
"processed_at": time.time(),
|
||||
"content": f"Placeholder content for {file_path}",
|
||||
"worker_id": self.request.id
|
||||
}
|
||||
|
||||
print(f"[WORKER] Document processing completed: {file_path}")
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
print(f"[WORKER] Document processing failed for {file_path}: {str(exc)}")
|
||||
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.start()
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user