Fisrt commit. Docker compose is working
This commit is contained in:
216
.gitignore
vendored
Normal file
216
.gitignore
vendored
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[codz]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
# poetry.lock
|
||||||
|
# poetry.toml
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||||
|
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||||
|
# pdm.lock
|
||||||
|
# pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# pixi
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||||
|
# pixi.lock
|
||||||
|
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||||
|
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||||
|
.pixi
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
*.rdb
|
||||||
|
*.aof
|
||||||
|
*.pid
|
||||||
|
|
||||||
|
# RabbitMQ
|
||||||
|
mnesia/
|
||||||
|
rabbitmq/
|
||||||
|
rabbitmq-data/
|
||||||
|
|
||||||
|
# ActiveMQ
|
||||||
|
activemq-data/
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.envrc
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Abstra
|
||||||
|
# Abstra is an AI-powered process automation framework.
|
||||||
|
# Ignore directories containing user credentials, local state, and settings.
|
||||||
|
# Learn more at https://abstra.io/docs
|
||||||
|
.abstra/
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||||
|
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||||
|
# you could uncomment the following to ignore the entire vscode folder
|
||||||
|
# .vscode/
|
||||||
|
|
||||||
|
# Ruff stuff:
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
|
||||||
|
# Marimo
|
||||||
|
marimo/_static/
|
||||||
|
marimo/_lsp/
|
||||||
|
__marimo__/
|
||||||
|
|
||||||
|
# Streamlit
|
||||||
|
.streamlit/secrets.toml
|
||||||
255
Readme.md
Normal file
255
Readme.md
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
# MyDocManager
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
MyDocManager is a real-time document processing application that automatically detects files in a monitored directory, processes them asynchronously, and stores the results in a database. The application uses a modern microservices architecture with Redis for task queuing and MongoDB for data persistence.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Technology Stack
|
||||||
|
- **Backend API**: FastAPI (Python 3.12)
|
||||||
|
- **Task Processing**: Celery with Redis broker
|
||||||
|
- **Document Processing**: EasyOCR, PyMuPDF, python-docx, pdfplumber
|
||||||
|
- **Database**: MongoDB
|
||||||
|
- **Frontend**: React
|
||||||
|
- **Containerization**: Docker & Docker Compose
|
||||||
|
- **File Monitoring**: Python watchdog library
|
||||||
|
|
||||||
|
### Services Architecture
|
||||||
|
┌─────────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
│ Frontend │ │ file- │ │ Redis │ │ Worker │ │ MongoDB │
|
||||||
|
│ (React) │◄──►│ processor │───►│ (Broker) │◄──►│ (Celery) │───►│ (Results) │
|
||||||
|
│ │ │ (FastAPI + │ │ │ │ │ │ │
|
||||||
|
│ │ │ watchdog) │ │ │ │ │ │ │
|
||||||
|
└─────────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
|
||||||
|
### Docker Services
|
||||||
|
1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch
|
||||||
|
2. **worker**: Celery workers for document processing (OCR, text extraction)
|
||||||
|
3. **redis**: Message broker for Celery tasks
|
||||||
|
4. **mongodb**: Final database for processing results
|
||||||
|
5. **frontend**: React interface for monitoring and file access
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
1. **File Detection**: Watchdog monitors target directory in real-time
|
||||||
|
2. **Task Creation**: FastAPI creates Celery task for each detected file
|
||||||
|
3. **Task Processing**: Worker processes document (OCR, text extraction)
|
||||||
|
4. **Result Storage**: Processed data stored in MongoDB
|
||||||
|
5. **Monitoring**: React frontend displays processing status and results
|
||||||
|
|
||||||
|
## Document Processing Capabilities
|
||||||
|
|
||||||
|
### Supported File Types
|
||||||
|
- **PDF**: Direct text extraction + OCR for scanned documents
|
||||||
|
- **Word Documents**: .docx text extraction
|
||||||
|
- **Images**: OCR text recognition (JPG, PNG, etc.)
|
||||||
|
|
||||||
|
### Processing Libraries
|
||||||
|
- **EasyOCR**: Modern OCR engine (80+ languages, deep learning-based)
|
||||||
|
- **PyMuPDF**: PDF text extraction and manipulation
|
||||||
|
- **python-docx**: Word document processing
|
||||||
|
- **pdfplumber**: Advanced PDF text extraction
|
||||||
|
|
||||||
|
## Development Environment
|
||||||
|
|
||||||
|
### Container-Based Development
|
||||||
|
The application is designed for container-based development with hot-reload capabilities:
|
||||||
|
- Source code mounted as volumes for real-time updates
|
||||||
|
- All services orchestrated via Docker Compose
|
||||||
|
- Development and production parity
|
||||||
|
|
||||||
|
### Key Features
|
||||||
|
- **Real-time Processing**: Immediate file detection and processing
|
||||||
|
- **Horizontal Scaling**: Multiple workers can be added easily
|
||||||
|
- **Fault Tolerance**: Celery provides automatic retry mechanisms
|
||||||
|
- **Monitoring**: Built-in task status tracking
|
||||||
|
- **Hot Reload**: Development changes reflected instantly in containers
|
||||||
|
|
||||||
|
### Docker Services
|
||||||
|
1. **file-processor**: FastAPI + real-time file monitoring + Celery task dispatch
|
||||||
|
2. **worker**: Celery workers for document processing (OCR, text extraction)
|
||||||
|
3. **redis**: Message broker for Celery tasks
|
||||||
|
4. **mongodb**: Final database for processing results
|
||||||
|
5. **frontend**: React interface for monitoring and file access
|
||||||
|
|
||||||
|
## Project Structure (To be implemented)
|
||||||
|
|
||||||
|
MyDocManager/
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── src/
|
||||||
|
│ ├── file-processor/
|
||||||
|
│ │ ├── Dockerfile
|
||||||
|
│ │ ├── requirements.txt
|
||||||
|
│ │ ├── app/
|
||||||
|
│ │ │ ├── main.py
|
||||||
|
│ │ │ ├── file_watcher.py
|
||||||
|
│ │ │ ├── celery_app.py
|
||||||
|
│ │ │ └── api/
|
||||||
|
│ ├── worker/
|
||||||
|
│ │ ├── Dockerfile
|
||||||
|
│ │ ├── requirements.txt
|
||||||
|
│ │ └── tasks/
|
||||||
|
│ └── frontend/
|
||||||
|
│ ├── Dockerfile
|
||||||
|
│ ├── package.json
|
||||||
|
│ └── src/
|
||||||
|
├── tests/
|
||||||
|
│ ├── file-processor/
|
||||||
|
│ └── worker/
|
||||||
|
├── volumes/
|
||||||
|
│ └── watched_files/
|
||||||
|
└── README.md
|
||||||
|
|
||||||
|
|
||||||
|
## Docker Commands Reference
|
||||||
|
|
||||||
|
### Initial Setup & Build
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build and start all services (first time)
|
||||||
|
docker-compose up --build
|
||||||
|
|
||||||
|
# Build and start in background
|
||||||
|
docker-compose up --build -d
|
||||||
|
|
||||||
|
# Build specific service
|
||||||
|
docker-compose build file-processor
|
||||||
|
docker-compose build worker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development Workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services
|
||||||
|
docker-compose up
|
||||||
|
|
||||||
|
# Start in background (detached mode)
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Stop all services
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
# Stop and remove volumes (⚠️ deletes MongoDB data)
|
||||||
|
docker-compose down -v
|
||||||
|
|
||||||
|
# Restart specific service
|
||||||
|
docker-compose restart file-processor
|
||||||
|
docker-compose restart worker
|
||||||
|
docker-compose restart redis
|
||||||
|
docker-compose restart mongodb
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring & Debugging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View logs of all services
|
||||||
|
docker-compose logs
|
||||||
|
|
||||||
|
# View logs of specific service
|
||||||
|
docker-compose logs file-processor
|
||||||
|
docker-compose logs worker
|
||||||
|
docker-compose logs redis
|
||||||
|
docker-compose logs mongodb
|
||||||
|
|
||||||
|
# Follow logs in real-time
|
||||||
|
docker-compose logs -f
|
||||||
|
docker-compose logs -f worker
|
||||||
|
|
||||||
|
# View running containers
|
||||||
|
docker-compose ps
|
||||||
|
|
||||||
|
# Execute command in running container
|
||||||
|
docker-compose exec file-processor bash
|
||||||
|
docker-compose exec worker bash
|
||||||
|
docker-compose exec mongodb mongosh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start only specific services
|
||||||
|
docker-compose up redis mongodb file-processor
|
||||||
|
|
||||||
|
# Stop specific service
|
||||||
|
docker-compose stop worker
|
||||||
|
docker-compose stop file-processor
|
||||||
|
|
||||||
|
# Remove stopped containers
|
||||||
|
docker-compose rm
|
||||||
|
|
||||||
|
# Scale workers (multiple instances)
|
||||||
|
docker-compose up --scale worker=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hot-Reload Configuration
|
||||||
|
|
||||||
|
- **file-processor**: Hot-reload enabled via `--reload` flag
|
||||||
|
- Code changes in `src/file-processor/app/` automatically restart FastAPI
|
||||||
|
- **worker**: No hot-reload (manual restart required for stability)
|
||||||
|
- Code changes in `src/worker/tasks/` require: `docker-compose restart worker`
|
||||||
|
|
||||||
|
### Useful Service URLs
|
||||||
|
|
||||||
|
- **FastAPI API**: http://localhost:8000
|
||||||
|
- **FastAPI Docs**: http://localhost:8000/docs
|
||||||
|
- **Health Check**: http://localhost:8000/health
|
||||||
|
- **Redis**: localhost:6379
|
||||||
|
- **MongoDB**: localhost:27017
|
||||||
|
|
||||||
|
### Testing Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test FastAPI health
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
|
||||||
|
# Test Celery task dispatch
|
||||||
|
curl -X POST http://localhost:8000/test-task \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"message": "Hello from test!"}'
|
||||||
|
|
||||||
|
# Monitor Celery tasks
|
||||||
|
docker-compose logs -f worker
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Key Implementation Notes
|
||||||
|
|
||||||
|
### Python Standards
|
||||||
|
- **Style**: PEP 8 compliance
|
||||||
|
- **Documentation**: Google/NumPy docstring format
|
||||||
|
- **Naming**: snake_case for variables and functions
|
||||||
|
- **Testing**: pytest with test_i_can_xxx / test_i_cannot_xxx patterns
|
||||||
|
|
||||||
|
### Dependencies Management
|
||||||
|
- **Package Manager**: pip (standard)
|
||||||
|
- **External Dependencies**: Listed in each service's requirements.txt
|
||||||
|
- **Standard Library First**: Prefer standard library when possible
|
||||||
|
|
||||||
|
### Testing Strategy
|
||||||
|
- All code must be testable
|
||||||
|
- Unit tests for each processing function
|
||||||
|
- Integration tests for file processing workflow
|
||||||
|
- Tests validated before implementation
|
||||||
|
|
||||||
|
### Critical Architecture Decisions Made
|
||||||
|
1. **Option Selected**: Single FastAPI service handles both API and file watching
|
||||||
|
2. **Celery with Redis**: Chosen over other async patterns for scalability
|
||||||
|
3. **EasyOCR Preferred**: Selected over Tesseract for modern OCR needs
|
||||||
|
4. **Container Development**: Hot-reload setup required for development workflow
|
||||||
|
|
||||||
|
### Development Process Requirements
|
||||||
|
1. **Collaborative Validation**: All options must be explained before coding
|
||||||
|
2. **Test-First Approach**: Test cases defined and validated before implementation
|
||||||
|
3. **Incremental Development**: Start simple, extend functionality progressively
|
||||||
|
4. **Error Handling**: Clear problem explanation required before proposing fixes
|
||||||
|
|
||||||
|
### Next Implementation Steps
|
||||||
|
1. Create docker-compose.yml with all services
|
||||||
|
2. Implement basic FastAPI service structure
|
||||||
|
3. Add watchdog file monitoring
|
||||||
|
4. Create Celery task structure
|
||||||
|
5. Implement document processing tasks
|
||||||
|
6. Build React monitoring interface
|
||||||
|
|
||||||
|
"""
|
||||||
73
docker-compose.yml
Normal file
73
docker-compose.yml
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
# Redis - Message broker for Celery
|
||||||
|
redis:
|
||||||
|
image: redis:8-alpine
|
||||||
|
container_name: mydocmanager-redis
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
networks:
|
||||||
|
- mydocmanager-network
|
||||||
|
|
||||||
|
# MongoDB - Final database for results
|
||||||
|
mongodb:
|
||||||
|
image: mongo:7
|
||||||
|
container_name: mydocmanager-mongodb
|
||||||
|
ports:
|
||||||
|
- "27017:27017"
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: admin
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: password123
|
||||||
|
MONGO_INITDB_DATABASE: mydocmanager
|
||||||
|
volumes:
|
||||||
|
- mongodb-data:/data/db
|
||||||
|
networks:
|
||||||
|
- mydocmanager-network
|
||||||
|
|
||||||
|
# File Processor - FastAPI + file monitoring + Celery task dispatch
|
||||||
|
file-processor:
|
||||||
|
build:
|
||||||
|
context: ./src/file-processor
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: mydocmanager-file-processor
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- REDIS_URL=redis://redis:6379/0
|
||||||
|
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||||
|
volumes:
|
||||||
|
- ./src/file-processor/app:/app
|
||||||
|
- ./volumes/watched_files:/watched_files
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
- mongodb
|
||||||
|
networks:
|
||||||
|
- mydocmanager-network
|
||||||
|
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
|
# Worker - Celery workers for document processing
|
||||||
|
worker:
|
||||||
|
build:
|
||||||
|
context: ./src/worker
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: mydocmanager-worker
|
||||||
|
environment:
|
||||||
|
- REDIS_URL=redis://redis:6379/0
|
||||||
|
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||||
|
volumes:
|
||||||
|
- ./src/worker/tasks:/app
|
||||||
|
- ./volumes/watched_files:/watched_files
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
- mongodb
|
||||||
|
networks:
|
||||||
|
- mydocmanager-network
|
||||||
|
command: celery -A main worker --loglevel=info
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
mongodb-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
mydocmanager-network:
|
||||||
|
driver: bridge
|
||||||
7
main.py
Normal file
7
main.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
def main():
|
||||||
|
print("Hello word !")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
33
requirements.txt
Normal file
33
requirements.txt
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
amqp==5.3.1
|
||||||
|
annotated-types==0.7.0
|
||||||
|
anyio==4.10.0
|
||||||
|
billiard==4.2.1
|
||||||
|
celery==5.5.3
|
||||||
|
click==8.2.1
|
||||||
|
click-didyoumean==0.3.1
|
||||||
|
click-plugins==1.1.1.2
|
||||||
|
click-repl==0.3.0
|
||||||
|
fastapi==0.116.1
|
||||||
|
h11==0.16.0
|
||||||
|
httptools==0.6.4
|
||||||
|
idna==3.10
|
||||||
|
kombu==5.5.4
|
||||||
|
packaging==25.0
|
||||||
|
prompt_toolkit==3.0.52
|
||||||
|
pydantic==2.11.9
|
||||||
|
pydantic_core==2.33.2
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
python-dotenv==1.1.1
|
||||||
|
PyYAML==6.0.2
|
||||||
|
six==1.17.0
|
||||||
|
sniffio==1.3.1
|
||||||
|
starlette==0.47.3
|
||||||
|
typing-inspection==0.4.1
|
||||||
|
typing_extensions==4.15.0
|
||||||
|
tzdata==2025.2
|
||||||
|
uvicorn==0.35.0
|
||||||
|
uvloop==0.21.0
|
||||||
|
vine==5.1.0
|
||||||
|
watchfiles==1.1.0
|
||||||
|
wcwidth==0.2.13
|
||||||
|
websockets==15.0.1
|
||||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
17
src/file-processor/Dockerfile
Normal file
17
src/file-processor/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy requirements and install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app/ .
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Command will be overridden by docker-compose
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
0
src/file-processor/__init__.py
Normal file
0
src/file-processor/__init__.py
Normal file
0
src/file-processor/app/__init__.py
Normal file
0
src/file-processor/app/__init__.py
Normal file
120
src/file-processor/app/main.py
Normal file
120
src/file-processor/app/main.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
"""
|
||||||
|
FastAPI application for MyDocManager file processor service.
|
||||||
|
|
||||||
|
This service provides API endpoints for health checks and task dispatching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import redis
|
||||||
|
from celery import Celery
|
||||||
|
|
||||||
|
# Initialize FastAPI app
|
||||||
|
app = FastAPI(
|
||||||
|
title="MyDocManager File Processor",
|
||||||
|
description="File processing and task dispatch service",
|
||||||
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||||
|
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||||
|
|
||||||
|
# Initialize Redis client
|
||||||
|
try:
|
||||||
|
redis_client = redis.from_url(REDIS_URL)
|
||||||
|
except Exception as e:
|
||||||
|
redis_client = None
|
||||||
|
print(f"Warning: Could not connect to Redis: {e}")
|
||||||
|
|
||||||
|
# Initialize Celery
|
||||||
|
celery_app = Celery(
|
||||||
|
"file_processor",
|
||||||
|
broker=REDIS_URL,
|
||||||
|
backend=REDIS_URL
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Pydantic models
|
||||||
|
class TestTaskRequest(BaseModel):
|
||||||
|
"""Request model for test task."""
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Service health status with dependencies
|
||||||
|
"""
|
||||||
|
health_status = {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "file-processor",
|
||||||
|
"dependencies": {
|
||||||
|
"redis": "unknown",
|
||||||
|
"mongodb": "unknown"
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Redis connection
|
||||||
|
if redis_client:
|
||||||
|
try:
|
||||||
|
redis_client.ping()
|
||||||
|
health_status["dependencies"]["redis"] = "connected"
|
||||||
|
except Exception:
|
||||||
|
health_status["dependencies"]["redis"] = "disconnected"
|
||||||
|
health_status["status"] = "degraded"
|
||||||
|
|
||||||
|
return health_status
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/test-task")
|
||||||
|
async def dispatch_test_task(request: TestTaskRequest):
|
||||||
|
"""
|
||||||
|
Dispatch a test task to Celery worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: Test task request containing message
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Task dispatch information
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If task dispatch fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Send task to worker
|
||||||
|
task = celery_app.send_task(
|
||||||
|
"main.test_task",
|
||||||
|
args=[request.message]
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "dispatched",
|
||||||
|
"task_id": task.id,
|
||||||
|
"message": f"Test task dispatched with message: {request.message}"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to dispatch task: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""
|
||||||
|
Root endpoint.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Basic service information
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"service": "MyDocManager File Processor",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"status": "running"
|
||||||
|
}
|
||||||
6
src/file-processor/requirements.txt
Normal file
6
src/file-processor/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
fastapi==0.116.1
|
||||||
|
uvicorn==0.35.0
|
||||||
|
celery==5.5.3
|
||||||
|
redis==6.4.0
|
||||||
|
pymongo==4.15.0
|
||||||
|
pydantic==2.11.9
|
||||||
14
src/worker/Dockerfile
Normal file
14
src/worker/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy requirements and install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY tasks/ .
|
||||||
|
|
||||||
|
# Command will be overridden by docker-compose
|
||||||
|
CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
|
||||||
0
src/worker/__init__.py
Normal file
0
src/worker/__init__.py
Normal file
4
src/worker/requirements.txt
Normal file
4
src/worker/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
celery==5.5.3
|
||||||
|
redis==6.4.0
|
||||||
|
pymongo==4.15.0
|
||||||
0
src/worker/tasks/__init__.py
Normal file
0
src/worker/tasks/__init__.py
Normal file
113
src/worker/tasks/main.py
Normal file
113
src/worker/tasks/main.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""
|
||||||
|
Celery worker for MyDocManager document processing tasks.
|
||||||
|
|
||||||
|
This module contains all Celery tasks for processing documents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from celery import Celery
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||||
|
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||||
|
|
||||||
|
# Initialize Celery app
|
||||||
|
app = Celery(
|
||||||
|
"mydocmanager_worker",
|
||||||
|
broker=REDIS_URL,
|
||||||
|
backend=REDIS_URL
|
||||||
|
)
|
||||||
|
|
||||||
|
# Celery configuration
|
||||||
|
app.conf.update(
|
||||||
|
task_serializer="json",
|
||||||
|
accept_content=["json"],
|
||||||
|
result_serializer="json",
|
||||||
|
timezone="UTC",
|
||||||
|
enable_utc=True,
|
||||||
|
task_track_started=True,
|
||||||
|
task_time_limit=300, # 5 minutes
|
||||||
|
task_soft_time_limit=240, # 4 minutes
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.task(bind=True)
|
||||||
|
def test_task(self, message: str):
|
||||||
|
"""
|
||||||
|
Test task for validating worker functionality.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: Test message to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Task result with processing information
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"[WORKER] Starting test task with message: {message}")
|
||||||
|
|
||||||
|
# Simulate some work
|
||||||
|
for i in range(5):
|
||||||
|
print(f"[WORKER] Processing step {i + 1}/5...")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Update task progress
|
||||||
|
self.update_state(
|
||||||
|
state="PROGRESS",
|
||||||
|
meta={
|
||||||
|
"current": i + 1,
|
||||||
|
"total": 5,
|
||||||
|
"message": f"Processing step {i + 1}"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"status": "completed",
|
||||||
|
"message": f"Successfully processed: {message}",
|
||||||
|
"processed_at": time.time(),
|
||||||
|
"worker_id": self.request.id
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"[WORKER] Test task completed successfully: {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[WORKER] Test task failed: {str(exc)}")
|
||||||
|
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
||||||
|
|
||||||
|
|
||||||
|
@app.task(bind=True)
|
||||||
|
def process_document_task(self, file_path: str):
|
||||||
|
"""
|
||||||
|
Placeholder task for document processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the document to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Processing result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"[WORKER] Starting document processing for: {file_path}")
|
||||||
|
|
||||||
|
# Placeholder for document processing logic
|
||||||
|
time.sleep(2) # Simulate processing time
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"status": "completed",
|
||||||
|
"file_path": file_path,
|
||||||
|
"processed_at": time.time(),
|
||||||
|
"content": f"Placeholder content for {file_path}",
|
||||||
|
"worker_id": self.request.id
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"[WORKER] Document processing completed: {file_path}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[WORKER] Document processing failed for {file_path}: {str(exc)}")
|
||||||
|
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.start()
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user