Compare commits
13 Commits
Implementi
...
AddingUser
| Author | SHA1 | Date | |
|---|---|---|---|
| 264dac077c | |||
| 707507b128 | |||
| 477d6bf538 | |||
| 79bfae4ba8 | |||
| 8ae9754fde | |||
| bd52f2d296 | |||
| 62c7e46a88 | |||
| 06549c0d02 | |||
| f5e909463a | |||
| 78181e71be | |||
| 56dec3a619 | |||
| fc2e9e621e | |||
| 4de732b0ae |
160
.gitignore
vendored
160
.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
|
volumes
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[codz]
|
*.py[codz]
|
||||||
@@ -213,4 +215,160 @@ marimo/_lsp/
|
|||||||
__marimo__/
|
__marimo__/
|
||||||
|
|
||||||
# Streamlit
|
# Streamlit
|
||||||
.streamlit/secrets.toml
|
.streamlit/secrets.toml
|
||||||
|
|
||||||
|
### react ###
|
||||||
|
.DS_*
|
||||||
|
*.log
|
||||||
|
logs
|
||||||
|
**/*.backup.*
|
||||||
|
**/*.back.*
|
||||||
|
|
||||||
|
node_modules
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
*.sublime*
|
||||||
|
|
||||||
|
psd
|
||||||
|
thumb
|
||||||
|
sketch
|
||||||
|
|
||||||
|
### Node ###
|
||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||||
|
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||||
|
|
||||||
|
# Runtime data
|
||||||
|
pids
|
||||||
|
*.pid
|
||||||
|
*.seed
|
||||||
|
*.pid.lock
|
||||||
|
|
||||||
|
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||||
|
lib-cov
|
||||||
|
|
||||||
|
# Coverage directory used by tools like istanbul
|
||||||
|
coverage
|
||||||
|
*.lcov
|
||||||
|
|
||||||
|
# nyc test coverage
|
||||||
|
.nyc_output
|
||||||
|
|
||||||
|
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||||
|
.grunt
|
||||||
|
|
||||||
|
# Bower dependency directory (https://bower.io/)
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
# node-waf configuration
|
||||||
|
.lock-wscript
|
||||||
|
|
||||||
|
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||||
|
build/Release
|
||||||
|
|
||||||
|
# Dependency directories
|
||||||
|
node_modules/
|
||||||
|
jspm_packages/
|
||||||
|
|
||||||
|
# Snowpack dependency directory (https://snowpack.dev/)
|
||||||
|
web_modules/
|
||||||
|
|
||||||
|
# TypeScript cache
|
||||||
|
*.tsbuildinfo
|
||||||
|
|
||||||
|
# Optional npm cache directory
|
||||||
|
.npm
|
||||||
|
|
||||||
|
# Optional eslint cache
|
||||||
|
.eslintcache
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
.stylelintcache
|
||||||
|
|
||||||
|
# Microbundle cache
|
||||||
|
.rpt2_cache/
|
||||||
|
.rts2_cache_cjs/
|
||||||
|
.rts2_cache_es/
|
||||||
|
.rts2_cache_umd/
|
||||||
|
|
||||||
|
# Optional REPL history
|
||||||
|
.node_repl_history
|
||||||
|
|
||||||
|
# Output of 'npm pack'
|
||||||
|
*.tgz
|
||||||
|
|
||||||
|
# Yarn Integrity file
|
||||||
|
.yarn-integrity
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# parcel-bundler cache (https://parceljs.org/)
|
||||||
|
.cache
|
||||||
|
.parcel-cache
|
||||||
|
|
||||||
|
# Next.js build output
|
||||||
|
.next
|
||||||
|
out
|
||||||
|
|
||||||
|
# Nuxt.js build / generate output
|
||||||
|
.nuxt
|
||||||
|
dist
|
||||||
|
|
||||||
|
# Gatsby files
|
||||||
|
.cache/
|
||||||
|
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||||
|
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||||
|
# public
|
||||||
|
|
||||||
|
# vuepress build output
|
||||||
|
.vuepress/dist
|
||||||
|
|
||||||
|
# vuepress v2.x temp and cache directory
|
||||||
|
.temp
|
||||||
|
|
||||||
|
# Docusaurus cache and generated files
|
||||||
|
.docusaurus
|
||||||
|
|
||||||
|
# Serverless directories
|
||||||
|
.serverless/
|
||||||
|
|
||||||
|
# FuseBox cache
|
||||||
|
.fusebox/
|
||||||
|
|
||||||
|
# DynamoDB Local files
|
||||||
|
.dynamodb/
|
||||||
|
|
||||||
|
# TernJS port file
|
||||||
|
.tern-port
|
||||||
|
|
||||||
|
# Stores VSCode versions used for testing VSCode extensions
|
||||||
|
.vscode-test
|
||||||
|
|
||||||
|
# yarn v2
|
||||||
|
.yarn/cache
|
||||||
|
.yarn/unplugged
|
||||||
|
.yarn/build-state.yml
|
||||||
|
.yarn/install-state.gz
|
||||||
|
.pnp.*
|
||||||
|
|
||||||
|
### Node Patch ###
|
||||||
|
# Serverless Webpack directories
|
||||||
|
.webpack/
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
|
||||||
|
# SvelteKit build / generate output
|
||||||
|
.svelte-kit
|
||||||
32
Makefile
Normal file
32
Makefile
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
.PHONY: init up down restart logs clean
|
||||||
|
|
||||||
|
init:
|
||||||
|
@echo "Creating directories and setting permissions..."
|
||||||
|
@mkdir -p ./volumes/watched_files ./volumes/objects
|
||||||
|
@chown -R 1002:1002 ./volumes/watched_files ./volumes/objects
|
||||||
|
@echo "✓ Directories initialized"
|
||||||
|
|
||||||
|
up: init
|
||||||
|
@echo "Starting services..."
|
||||||
|
@docker-compose up -d
|
||||||
|
@echo "✓ Services started"
|
||||||
|
|
||||||
|
down:
|
||||||
|
@docker-compose down
|
||||||
|
|
||||||
|
restart:
|
||||||
|
@docker-compose restart
|
||||||
|
|
||||||
|
logs:
|
||||||
|
@docker-compose logs -f
|
||||||
|
|
||||||
|
clean: down
|
||||||
|
@echo "Cleaning volumes..."
|
||||||
|
@sudo rm -rf ./volumes
|
||||||
|
@echo "✓ Volumes cleaned"
|
||||||
|
|
||||||
|
rebuild: clean init
|
||||||
|
@echo "Rebuilding images..."
|
||||||
|
@docker-compose build --no-cache
|
||||||
|
@docker-compose up -d
|
||||||
|
@echo "✓ Services rebuilt and started"
|
||||||
425
Readme.md
425
Readme.md
@@ -13,7 +13,7 @@ architecture with Redis for task queuing and MongoDB for data persistence.
|
|||||||
- **Backend API**: FastAPI (Python 3.12)
|
- **Backend API**: FastAPI (Python 3.12)
|
||||||
- **Task Processing**: Celery with Redis broker
|
- **Task Processing**: Celery with Redis broker
|
||||||
- **Document Processing**: EasyOCR, PyMuPDF, python-docx, pdfplumber
|
- **Document Processing**: EasyOCR, PyMuPDF, python-docx, pdfplumber
|
||||||
- **Database**: MongoDB
|
- **Database**: MongoDB (pymongo)
|
||||||
- **Frontend**: React
|
- **Frontend**: React
|
||||||
- **Containerization**: Docker & Docker Compose
|
- **Containerization**: Docker & Docker Compose
|
||||||
- **File Monitoring**: Python watchdog library
|
- **File Monitoring**: Python watchdog library
|
||||||
@@ -95,25 +95,32 @@ MyDocManager/
|
|||||||
│ │ ├── requirements.txt
|
│ │ ├── requirements.txt
|
||||||
│ │ ├── app/
|
│ │ ├── app/
|
||||||
│ │ │ ├── main.py
|
│ │ │ ├── main.py
|
||||||
│ │ │ ├── file_watcher.py
|
│ │ │ ├── file_watcher.py # FileWatcher class with observer thread
|
||||||
│ │ │ ├── celery_app.py
|
│ │ │ ├── celery_app.py # Celery Configuration
|
||||||
│ │ │ ├── config/
|
│ │ │ ├── config/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
│ │ │ │ └── settings.py # JWT, MongoDB config
|
│ │ │ │ └── settings.py # JWT, MongoDB config
|
||||||
│ │ │ ├── models/
|
│ │ │ ├── models/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
│ │ │ │ ├── user.py # User Pydantic models
|
│ │ │ │ ├── user.py # User Pydantic models
|
||||||
│ │ │ │ └── auth.py # Auth Pydantic models
|
│ │ │ │ ├── auth.py # Auth Pydantic models
|
||||||
|
│ │ │ │ ├── document.py # Document Pydantic models
|
||||||
|
│ │ │ │ ├── job.py # Job Processing Pydantic models
|
||||||
|
│ │ │ │ └── types.py # PyObjectId and other useful types
|
||||||
│ │ │ ├── database/
|
│ │ │ ├── database/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
│ │ │ │ ├── connection.py # MongoDB connection
|
│ │ │ │ ├── connection.py # MongoDB connection (pymongo)
|
||||||
│ │ │ │ └── repositories/
|
│ │ │ │ └── repositories/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
│ │ │ │ └── user_repository.py # User CRUD operations
|
│ │ │ │ ├── user_repository.py # User CRUD operations (synchronous)
|
||||||
|
│ │ │ │ ├── document_repository.py # Document CRUD operations (synchronous)
|
||||||
|
│ │ │ │ └── job_repository.py # Job CRUD operations (synchronous)
|
||||||
│ │ │ ├── services/
|
│ │ │ ├── services/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
│ │ │ │ ├── auth_service.py # JWT & password logic
|
│ │ │ │ ├── auth_service.py # JWT & password logic (synchronous)
|
||||||
│ │ │ │ ├── user_service.py # User business logic
|
│ │ │ │ ├── user_service.py # User business logic (synchronous)
|
||||||
|
│ │ │ │ ├── document_service.py # Document business logic (synchronous)
|
||||||
|
│ │ │ │ ├── job_service.py # Job processing logic (synchronous)
|
||||||
│ │ │ │ └── init_service.py # Admin creation at startup
|
│ │ │ │ └── init_service.py # Admin creation at startup
|
||||||
│ │ │ ├── api/
|
│ │ │ ├── api/
|
||||||
│ │ │ │ ├── __init__.py
|
│ │ │ │ ├── __init__.py
|
||||||
@@ -125,7 +132,7 @@ MyDocManager/
|
|||||||
│ │ │ └── utils/
|
│ │ │ └── utils/
|
||||||
│ │ │ ├── __init__.py
|
│ │ │ ├── __init__.py
|
||||||
│ │ │ ├── security.py # Password utilities
|
│ │ │ ├── security.py # Password utilities
|
||||||
│ │ │ └── exceptions.py # Custom exceptions
|
│ │ │ └── document_matching.py # Fuzzy matching Algorithms
|
||||||
│ ├── worker/
|
│ ├── worker/
|
||||||
│ │ ├── Dockerfile
|
│ │ ├── Dockerfile
|
||||||
│ │ ├── requirements.txt
|
│ │ ├── requirements.txt
|
||||||
@@ -133,7 +140,13 @@ MyDocManager/
|
|||||||
│ └── frontend/
|
│ └── frontend/
|
||||||
│ ├── Dockerfile
|
│ ├── Dockerfile
|
||||||
│ ├── package.json
|
│ ├── package.json
|
||||||
|
│ ├── index.html
|
||||||
│ └── src/
|
│ └── src/
|
||||||
|
│ ├── assets/
|
||||||
|
│ ├── App.css
|
||||||
|
│ ├── App.jsx
|
||||||
|
│ ├── main.css
|
||||||
|
│ └── main.jsx
|
||||||
├── tests/
|
├── tests/
|
||||||
│ ├── file-processor/
|
│ ├── file-processor/
|
||||||
│ │ ├── test_auth/
|
│ │ ├── test_auth/
|
||||||
@@ -224,78 +237,76 @@ On first startup, the application automatically creates a default admin user:
|
|||||||
|
|
||||||
#### Files Collection
|
#### Files Collection
|
||||||
|
|
||||||
Stores file metadata and extracted content:
|
Stores file metadata and extracted content using Pydantic models:
|
||||||
|
|
||||||
```json
|
```python
|
||||||
{
|
class FileDocument(BaseModel):
|
||||||
"_id": "ObjectId",
|
"""
|
||||||
"filename": "document.pdf",
|
Model for file documents stored in the 'files' collection.
|
||||||
"filepath": "/watched_files/document.pdf",
|
|
||||||
"file_type": "pdf",
|
|
||||||
"extraction_method": "direct_text", // direct_text, ocr, hybrid
|
|
||||||
"metadata": {
|
|
||||||
"page_count": 15, // for PDFs
|
|
||||||
"word_count": 250, // for text files
|
|
||||||
"image_dimensions": { // for images
|
|
||||||
"width": 1920,
|
|
||||||
"height": 1080
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"detected_at": "2024-01-15T10:29:00Z",
|
|
||||||
"file_hash": "sha256_hash_value"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
#### Document Contents Collection
|
|
||||||
|
|
||||||
Stores actual file content and technical metadata:
|
Represents a file detected in the watched directory with its
|
||||||
```json
|
metadata and extracted content.
|
||||||
{
|
"""
|
||||||
"_id": "ObjectId",
|
|
||||||
"file_hash": "sha256_hash_value",
|
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||||
"content": "extracted text content...",
|
filename: str = Field(..., description="Original filename")
|
||||||
"encoding": "utf-8",
|
filepath: str = Field(..., description="Full path to the file")
|
||||||
"file_size": 2048576,
|
file_type: FileType = Field(..., description="Type of the file")
|
||||||
"mime_type": "application/pdf"
|
extraction_method: Optional[ExtractionMethod] = Field(default=None, description="Method used to extract content")
|
||||||
}
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
|
||||||
|
detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
|
||||||
|
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
||||||
|
encoding: str = Field(default="utf-8", description="Character encoding for text files")
|
||||||
|
file_size: int = Field(..., ge=0, description="File size in bytes")
|
||||||
|
mime_type: str = Field(..., description="MIME type detected")
|
||||||
|
|
||||||
|
@field_validator('filepath')
|
||||||
|
@classmethod
|
||||||
|
def validate_filepath(cls, v: str) -> str:
|
||||||
|
"""Validate filepath format."""
|
||||||
|
if not v.strip():
|
||||||
|
raise ValueError("Filepath cannot be empty")
|
||||||
|
return v.strip()
|
||||||
|
|
||||||
|
@field_validator('filename')
|
||||||
|
@classmethod
|
||||||
|
def validate_filename(cls, v: str) -> str:
|
||||||
|
"""Validate filename format."""
|
||||||
|
if not v.strip():
|
||||||
|
raise ValueError("Filename cannot be empty")
|
||||||
|
return v.strip()
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Processing Jobs Collection
|
#### Processing Jobs Collection
|
||||||
|
|
||||||
Tracks processing status and lifecycle:
|
Tracks processing status and lifecycle:
|
||||||
|
|
||||||
```json
|
```python
|
||||||
{
|
class ProcessingJob(BaseModel):
|
||||||
"_id": "ObjectId",
|
"""
|
||||||
"file_id": "reference_to_files_collection",
|
Model for processing jobs stored in the 'processing_jobs' collection.
|
||||||
"status": "completed",
|
|
||||||
// pending, processing, completed, failed
|
Tracks the lifecycle and status of document processing tasks.
|
||||||
"task_id": "celery_task_uuid",
|
"""
|
||||||
"created_at": "2024-01-15T10:29:00Z",
|
|
||||||
"started_at": "2024-01-15T10:29:30Z",
|
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||||
"completed_at": "2024-01-15T10:30:00Z",
|
file_id: PyObjectId = Field(..., description="Reference to file document")
|
||||||
"error_message": null
|
status: ProcessingStatus = Field(default=ProcessingStatus.PENDING, description="Current processing status")
|
||||||
}
|
task_id: Optional[str] = Field(default=None, description="Celery task UUID")
|
||||||
|
created_at: Optional[datetime] = Field(default=None, description="Timestamp when job was created")
|
||||||
|
started_at: Optional[datetime] = Field(default=None, description="Timestamp when processing started")
|
||||||
|
completed_at: Optional[datetime] = Field(default=None, description="Timestamp when processing completed")
|
||||||
|
error_message: Optional[str] = Field(default=None, description="Error message if processing failed")
|
||||||
|
|
||||||
|
@field_validator('error_message')
|
||||||
|
@classmethod
|
||||||
|
def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
|
||||||
|
"""Clean up error message."""
|
||||||
|
if v is not None:
|
||||||
|
return v.strip() if v.strip() else None
|
||||||
|
return v
|
||||||
```
|
```
|
||||||
|
|
||||||
### Data Storage Strategy
|
|
||||||
|
|
||||||
- **Choice**: Three separate collections for files, content, and processing status
|
|
||||||
- **Rationale**: Normalization prevents content duplication when multiple files have identical content
|
|
||||||
- **Benefits**:
|
|
||||||
- Content deduplication via SHA256 hash
|
|
||||||
- Better query performance for metadata vs content searches
|
|
||||||
- Clear separation of concerns between file metadata, content, and processing lifecycle
|
|
||||||
- Multiple files can reference the same content (e.g., identical copies in different locations)
|
|
||||||
|
|
||||||
### Content Storage Location
|
|
||||||
|
|
||||||
- **Choice**: Store extracted content in separate `document_contents` collection
|
|
||||||
- **Rationale**: Content normalization and deduplication
|
|
||||||
- **Benefits**:
|
|
||||||
- Single content storage per unique file hash
|
|
||||||
- Multiple file entries can reference same content
|
|
||||||
- Efficient storage for duplicate files
|
|
||||||
|
|
||||||
### Supported File Types (Initial Implementation)
|
### Supported File Types (Initial Implementation)
|
||||||
|
|
||||||
- **Text Files** (`.txt`): Direct content reading
|
- **Text Files** (`.txt`): Direct content reading
|
||||||
@@ -306,7 +317,7 @@ Tracks processing status and lifecycle:
|
|||||||
|
|
||||||
#### Watchdog Implementation
|
#### Watchdog Implementation
|
||||||
|
|
||||||
- **Choice**: Dedicated observer thread (Option A)
|
- **Choice**: Dedicated observer thread
|
||||||
- **Rationale**: Standard approach, clean separation of concerns
|
- **Rationale**: Standard approach, clean separation of concerns
|
||||||
- **Implementation**: Watchdog observer runs in separate thread from FastAPI
|
- **Implementation**: Watchdog observer runs in separate thread from FastAPI
|
||||||
|
|
||||||
@@ -327,17 +338,86 @@ Tracks processing status and lifecycle:
|
|||||||
|
|
||||||
#### Content Storage Location
|
#### Content Storage Location
|
||||||
|
|
||||||
- **Choice**: Store extracted content in `files` collection
|
- **Choice**: Store files in the file system, using the SHA256 hash as filename
|
||||||
- **Rationale**: Content is intrinsic property of the file
|
- **Rationale**: MongoDB is not meant for large files, better performance. Files remain in the file system for easy
|
||||||
- **Benefits**: Single query to get file + content, simpler data model
|
access.
|
||||||
|
|
||||||
### Implementation Order
|
#### Repository and Services Implementation
|
||||||
|
|
||||||
1. ✅ Pydantic models for MongoDB collections
|
- **Choice**: Synchronous implementation using pymongo
|
||||||
2. ✅ Repository layer for data access (files + processing_jobs)
|
- **Rationale**: Full compatibility with Celery workers and simplified workflow
|
||||||
3. ✅ Celery tasks for document processing
|
- **Implementation**: All repositories and services operate synchronously for seamless integration
|
||||||
4. ✅ Watchdog file monitoring implementation
|
|
||||||
5. ✅ FastAPI integration and startup coordination
|
|
||||||
|
## Job Management Layer
|
||||||
|
|
||||||
|
### Repository Pattern Implementation
|
||||||
|
|
||||||
|
The job management system follows the repository pattern for clean separation between data access and business logic.
|
||||||
|
|
||||||
|
#### JobRepository
|
||||||
|
|
||||||
|
Handles direct MongoDB operations for processing jobs using synchronous pymongo:
|
||||||
|
|
||||||
|
**CRUD Operations:**
|
||||||
|
- `create_job()` - Create new processing job with automatic `created_at` timestamp
|
||||||
|
- `get_job_by_id()` - Retrieve job by ObjectId
|
||||||
|
- `update_job_status()` - Update job status with automatic timestamp management
|
||||||
|
- `delete_job()` - Remove job from database
|
||||||
|
- `get_jobs_by_file_id()` - Get all jobs for specific file
|
||||||
|
- `get_jobs_by_status()` - Get jobs filtered by processing status
|
||||||
|
|
||||||
|
**Automatic Timestamp Management:**
|
||||||
|
- `created_at`: Set automatically during job creation
|
||||||
|
- `started_at`: Set automatically when status changes to PROCESSING
|
||||||
|
- `completed_at`: Set automatically when status changes to COMPLETED or FAILED
|
||||||
|
|
||||||
|
#### JobService
|
||||||
|
|
||||||
|
Provides synchronous business logic layer with strict status transition validation:
|
||||||
|
|
||||||
|
**Status Transition Methods:**
|
||||||
|
- `mark_job_as_started()` - PENDING → PROCESSING
|
||||||
|
- `mark_job_as_completed()` - PROCESSING → COMPLETED
|
||||||
|
- `mark_job_as_failed()` - PROCESSING → FAILED
|
||||||
|
|
||||||
|
**Validation Rules:**
|
||||||
|
- Strict status transitions (invalid transitions raise exceptions)
|
||||||
|
- Job existence verification before any operation
|
||||||
|
- Automatic timestamp management through repository layer
|
||||||
|
|
||||||
|
#### Custom Exceptions
|
||||||
|
|
||||||
|
**InvalidStatusTransitionError**: Raised for invalid status transitions
|
||||||
|
**JobRepositoryError**: Raised for MongoDB operation failures
|
||||||
|
|
||||||
|
#### Valid Status Transitions
|
||||||
|
|
||||||
|
```
|
||||||
|
PENDING → PROCESSING (via mark_job_as_started)
|
||||||
|
PROCESSING → COMPLETED (via mark_job_as_completed)
|
||||||
|
PROCESSING → FAILED (via mark_job_as_failed)
|
||||||
|
```
|
||||||
|
|
||||||
|
All other transitions are forbidden and will raise `InvalidStatusTransitionError`.
|
||||||
|
|
||||||
|
### File Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
src/file-processor/app/
|
||||||
|
├── database/repositories/
|
||||||
|
│ ├── job_repository.py # JobRepository class (synchronous)
|
||||||
|
│ ├── user_repository.py # UserRepository class (synchronous)
|
||||||
|
│ ├── document_repository.py # DocumentRepository class (synchronous)
|
||||||
|
│ └── file_repository.py # FileRepository class (synchronous)
|
||||||
|
├── services/
|
||||||
|
│ ├── job_service.py # JobService class (synchronous)
|
||||||
|
│ ├── auth_service.py # AuthService class (synchronous)
|
||||||
|
│ ├── user_service.py # UserService class (synchronous)
|
||||||
|
│ └── document_service.py # DocumentService class (synchronous)
|
||||||
|
└── exceptions/
|
||||||
|
└── job_exceptions.py # Custom exceptions
|
||||||
|
```
|
||||||
|
|
||||||
### Processing Pipeline Features
|
### Processing Pipeline Features
|
||||||
|
|
||||||
@@ -346,87 +426,7 @@ Tracks processing status and lifecycle:
|
|||||||
- **Status Tracking**: Real-time processing status via `processing_jobs` collection
|
- **Status Tracking**: Real-time processing status via `processing_jobs` collection
|
||||||
- **Extensible Metadata**: Flexible metadata storage per file type
|
- **Extensible Metadata**: Flexible metadata storage per file type
|
||||||
- **Multiple Extraction Methods**: Support for direct text, OCR, and hybrid approaches
|
- **Multiple Extraction Methods**: Support for direct text, OCR, and hybrid approaches
|
||||||
|
- **Synchronous Operations**: All database operations use pymongo for Celery compatibility
|
||||||
## Document Service Architecture
|
|
||||||
|
|
||||||
### Service Overview
|
|
||||||
|
|
||||||
The document service provides orchestrated access to file documents and their content through a single interface that coordinates between `FileDocument` and `DocumentContent` repositories.
|
|
||||||
|
|
||||||
### Service Design
|
|
||||||
|
|
||||||
- **Architecture Pattern**: Service orchestration with separate repositories
|
|
||||||
- **Transaction Support**: MongoDB ACID transactions for data consistency
|
|
||||||
- **Content Deduplication**: Multiple files can reference the same content via SHA256 hash
|
|
||||||
- **Error Handling**: MongoDB standard exceptions with transaction rollback
|
|
||||||
|
|
||||||
### Document Service (`document_service.py`)
|
|
||||||
|
|
||||||
Orchestrates operations between file and content repositories while maintaining data consistency.
|
|
||||||
|
|
||||||
#### Core Functionality
|
|
||||||
|
|
||||||
##### `create_document(file_path: str, file_bytes: bytes, encoding: str)`
|
|
||||||
|
|
||||||
Creates a new document with automatic attribute calculation and content deduplication.
|
|
||||||
|
|
||||||
**Automatic Calculations:**
|
|
||||||
- `file_hash`: SHA256 hash of file bytes
|
|
||||||
- `file_type`: Detection based on file extension
|
|
||||||
- `mime_type`: Detection via `python-magic` library
|
|
||||||
- `file_size`: Length of provided bytes
|
|
||||||
- `detected_at`: Current timestamp
|
|
||||||
- `metadata`: Empty dictionary (reserved for future extension)
|
|
||||||
|
|
||||||
**Deduplication Logic:**
|
|
||||||
1. Calculate SHA256 hash of file content
|
|
||||||
2. Check if `DocumentContent` with this hash already exists
|
|
||||||
3. If EXISTS: Create only `FileDocument` referencing existing content
|
|
||||||
4. If NOT EXISTS: Create both `FileDocument` and `DocumentContent` in transaction
|
|
||||||
|
|
||||||
**Transaction Flow:**
|
|
||||||
```
|
|
||||||
BEGIN TRANSACTION
|
|
||||||
IF content_exists(file_hash):
|
|
||||||
CREATE FileDocument with content reference
|
|
||||||
ELSE:
|
|
||||||
CREATE DocumentContent
|
|
||||||
CREATE FileDocument with content reference
|
|
||||||
COMMIT TRANSACTION
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Available Methods
|
|
||||||
|
|
||||||
- `create_document(file_path, file_bytes, encoding)`: Create with deduplication
|
|
||||||
- `get_document_by_id(document_id)`: Retrieve by document ID
|
|
||||||
- `get_document_by_hash(file_hash)`: Retrieve by file hash
|
|
||||||
- `get_document_by_filepath(filepath)`: Retrieve by file path
|
|
||||||
- `list_documents(skip, limit)`: Paginated document listing
|
|
||||||
- `count_documents()`: Total document count
|
|
||||||
- `update_document(document_id, update_data)`: Update document metadata
|
|
||||||
- `delete_document(document_id)`: Remove document and orphaned content
|
|
||||||
|
|
||||||
### Repository Dependencies
|
|
||||||
|
|
||||||
The document service coordinates two existing repositories:
|
|
||||||
|
|
||||||
#### File Repository (`file_repository.py`)
|
|
||||||
- `create_document()`, `find_document_by_id()`, `find_document_by_hash()`
|
|
||||||
- `find_document_by_filepath()`, `find_document_by_name()`
|
|
||||||
- `list_documents()`, `count_documents()`
|
|
||||||
- `update_document()`, `delete_document()`
|
|
||||||
|
|
||||||
#### Document Content Repository (`document_content_repository.py`)
|
|
||||||
- `create_document_content()`, `find_document_content_by_id()`
|
|
||||||
- `find_document_content_by_file_hash()`, `content_exists()`
|
|
||||||
- `update_document_content()`, `delete_document_content()`
|
|
||||||
- `list_document_contents()`, `count_document_contents()`
|
|
||||||
|
|
||||||
### Dependencies
|
|
||||||
|
|
||||||
- `python-magic`: MIME type detection
|
|
||||||
- `hashlib`: SHA256 hashing (standard library)
|
|
||||||
- `pymongo`: MongoDB transactions support
|
|
||||||
|
|
||||||
## Key Implementation Notes
|
## Key Implementation Notes
|
||||||
|
|
||||||
@@ -449,6 +449,7 @@ The document service coordinates two existing repositories:
|
|||||||
- **Package Manager**: pip (standard)
|
- **Package Manager**: pip (standard)
|
||||||
- **External Dependencies**: Listed in each service's requirements.txt
|
- **External Dependencies**: Listed in each service's requirements.txt
|
||||||
- **Standard Library First**: Prefer standard library when possible
|
- **Standard Library First**: Prefer standard library when possible
|
||||||
|
- **Database Driver**: pymongo for synchronous MongoDB operations
|
||||||
|
|
||||||
### Testing Strategy
|
### Testing Strategy
|
||||||
|
|
||||||
@@ -473,6 +474,7 @@ The document service coordinates two existing repositories:
|
|||||||
12. **Content in Files Collection**: Extracted content stored with file metadata
|
12. **Content in Files Collection**: Extracted content stored with file metadata
|
||||||
13. **Direct Task Dispatch**: File watcher directly creates Celery tasks
|
13. **Direct Task Dispatch**: File watcher directly creates Celery tasks
|
||||||
14. **SHA256 Duplicate Detection**: Prevents reprocessing identical files
|
14. **SHA256 Duplicate Detection**: Prevents reprocessing identical files
|
||||||
|
15. **Synchronous Implementation**: All repositories and services use pymongo for Celery compatibility
|
||||||
|
|
||||||
### Development Process Requirements
|
### Development Process Requirements
|
||||||
|
|
||||||
@@ -483,21 +485,88 @@ The document service coordinates two existing repositories:
|
|||||||
|
|
||||||
### Next Implementation Steps
|
### Next Implementation Steps
|
||||||
|
|
||||||
1. ✅ Create docker-compose.yml with all services => Done
|
1. Build React Login Page
|
||||||
2. ✅ Define user management and authentication architecture => Done
|
2. Build React Registration Page
|
||||||
3. ✅ Implement user models and authentication services =>
|
3. Build React Default Dashboard
|
||||||
1. models/user.py => Done
|
4. Build React User Management Pages
|
||||||
2. models/auth.py => Done
|
|
||||||
3. database/repositories/user_repository.py => Done
|
#### Validated Folders and files
|
||||||
4. ✅ Add automatic admin user creation if it does not exists => Done
|
```
|
||||||
5. **IN PROGRESS**: Implement file processing pipeline =>
|
src/frontend/src/
|
||||||
1. Create Pydantic models for files and processing_jobs collections
|
├── components/
|
||||||
2. Implement repository layer for file and processing job data access
|
│ ├── auth/
|
||||||
3. Create Celery tasks for document processing (.txt, .pdf, .docx)
|
│ │ ├── LoginForm.jsx # Composant formulaire de login => Done
|
||||||
4. Implement Watchdog file monitoring with dedicated observer
|
│ │ └── AuthLayout.jsx # Layout pour les pages d'auth => Done
|
||||||
5. Integrate file watcher with FastAPI startup
|
│ └── common/
|
||||||
6. Create protected API routes for user management
|
│ ├── Header.jsx # Header commun => TODO
|
||||||
7. Build React monitoring interface with authentication
|
│ ├── Layout.jsx # Header commun => TODO
|
||||||
|
│ └── ProtectedRoutes.jsx # Done
|
||||||
|
├── contexts/
|
||||||
|
│ └── AuthContext.jsx # Done
|
||||||
|
├── pages/
|
||||||
|
│ ├── LoginPage.jsx # Page complète de login => Done
|
||||||
|
│ └── DashboardPage.jsx # Page tableau de bord (exemple) => TODO
|
||||||
|
├── services/
|
||||||
|
│ └── authService.js # Service API pour auth => Done
|
||||||
|
├── hooks/
|
||||||
|
│ └── useAuth.js # Hook React pour gestion auth => TODO
|
||||||
|
├── utils/
|
||||||
|
│ └── api.js # Configuration axios/fetch => Done
|
||||||
|
├── App.jsx # Needs to be updated => TODO
|
||||||
|
```
|
||||||
|
#### Choices already made
|
||||||
|
* Pour la gestion des requêtes API et de l'état d'authentification, je propose
|
||||||
|
* axios (plus de fonctionnalités) :
|
||||||
|
* Installation d'axios pour les requêtes HTTP
|
||||||
|
* Intercepteurs pour gestion automatique du token
|
||||||
|
* Gestion d'erreurs centralisée
|
||||||
|
* Pour la gestion de l'état d'authentification et la navigation : Option A + C en même temps
|
||||||
|
* Option A - Context React + React Router :
|
||||||
|
* React Context pour l'état global d'auth (user, token, isAuthenticated)
|
||||||
|
* React Router pour la navigation entre pages
|
||||||
|
* Routes protégées automatiques
|
||||||
|
* Option C - Context + localStorage pour persistance :
|
||||||
|
* Token sauvegardé en localStorage pour rester connecté
|
||||||
|
* Context qui se recharge au démarrage de l'app
|
||||||
|
* CSS : Utilisation de daisyUI
|
||||||
|
|
||||||
|
#### Package.json
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "frontend",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.0.0",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite",
|
||||||
|
"build": "vite build",
|
||||||
|
"lint": "eslint .",
|
||||||
|
"preview": "vite preview"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@tailwindcss/vite": "^4.1.13",
|
||||||
|
"axios": "^1.12.2",
|
||||||
|
"react": "^19.1.1",
|
||||||
|
"react-dom": "^19.1.1",
|
||||||
|
"react-router-dom": "^7.9.3"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@eslint/js": "^9.33.0",
|
||||||
|
"@types/react": "^19.1.10",
|
||||||
|
"@types/react-dom": "^19.1.7",
|
||||||
|
"@vitejs/plugin-react": "^5.0.0",
|
||||||
|
"autoprefixer": "^10.4.21",
|
||||||
|
"daisyui": "^5.1.23",
|
||||||
|
"eslint": "^9.33.0",
|
||||||
|
"eslint-plugin-react-hooks": "^5.2.0",
|
||||||
|
"eslint-plugin-react-refresh": "^0.4.20",
|
||||||
|
"globals": "^16.3.0",
|
||||||
|
"postcss": "^8.5.6",
|
||||||
|
"tailwindcss": "^4.1.13",
|
||||||
|
"vite": "^7.1.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Annexes
|
## Annexes
|
||||||
|
|
||||||
@@ -586,4 +655,4 @@ docker-compose up --scale worker=3
|
|||||||
- **file-processor**: Hot-reload enabled via `--reload` flag
|
- **file-processor**: Hot-reload enabled via `--reload` flag
|
||||||
- Code changes in `src/file-processor/app/` automatically restart FastAPI
|
- Code changes in `src/file-processor/app/` automatically restart FastAPI
|
||||||
- **worker**: No hot-reload (manual restart required for stability)
|
- **worker**: No hot-reload (manual restart required for stability)
|
||||||
- Code changes in `src/worker/tasks/` require: `docker-compose restart worker`
|
- Code changes in `src/worker/tasks/` require: `docker-compose restart worker`
|
||||||
@@ -19,7 +19,7 @@ services:
|
|||||||
MONGO_INITDB_ROOT_PASSWORD: password123
|
MONGO_INITDB_ROOT_PASSWORD: password123
|
||||||
MONGO_INITDB_DATABASE: mydocmanager
|
MONGO_INITDB_DATABASE: mydocmanager
|
||||||
volumes:
|
volumes:
|
||||||
- mongodb-data:/data/db
|
- ./volumes/db:/data/db
|
||||||
networks:
|
networks:
|
||||||
- mydocmanager-network
|
- mydocmanager-network
|
||||||
|
|
||||||
@@ -34,10 +34,14 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- REDIS_URL=redis://redis:6379/0
|
- REDIS_URL=redis://redis:6379/0
|
||||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||||
- PYTHONPATH=/app
|
- PYTHONPATH=/app:/tasks # Added /tasks to Python path
|
||||||
volumes:
|
volumes:
|
||||||
- ./src/file-processor:/app
|
- ./src/file-processor:/app
|
||||||
|
- ./src/worker/tasks:/app/tasks # <- Added: shared access to worker tasks
|
||||||
- ./volumes/watched_files:/watched_files
|
- ./volumes/watched_files:/watched_files
|
||||||
|
- ./volumes/objects:/objects
|
||||||
|
- ./volumes/errors:/errors
|
||||||
|
- ./volumes/ignored:/ignored
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
- mongodb
|
- mongodb
|
||||||
@@ -56,14 +60,32 @@ services:
|
|||||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin
|
||||||
- PYTHONPATH=/app
|
- PYTHONPATH=/app
|
||||||
volumes:
|
volumes:
|
||||||
- ./src/worker/tasks:/app
|
- ./src/worker:/app
|
||||||
|
- ./src/file-processor/app:/app/app # <- Added: shared access file-processor app
|
||||||
- ./volumes/watched_files:/watched_files
|
- ./volumes/watched_files:/watched_files
|
||||||
|
- ./volumes/objects:/objects
|
||||||
|
- ./volumes/errors:/errors
|
||||||
|
- ./volumes/ignored:/ignored
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
- mongodb
|
- mongodb
|
||||||
networks:
|
networks:
|
||||||
- mydocmanager-network
|
- mydocmanager-network
|
||||||
command: celery -A main worker --loglevel=info
|
command: celery -A tasks.main worker --loglevel=info
|
||||||
|
|
||||||
|
# Frontend - React application with Vite
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: ./src/frontend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: mydocmanager-frontend
|
||||||
|
ports:
|
||||||
|
- "5173:5173"
|
||||||
|
volumes:
|
||||||
|
- ./src/frontend:/app
|
||||||
|
- /app/node_modules # Anonymous volume to prevent node_modules override
|
||||||
|
networks:
|
||||||
|
- mydocmanager-network
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
mongodb-data:
|
mongodb-data:
|
||||||
|
|||||||
31
package-lock.json
generated
Normal file
31
package-lock.json
generated
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
{
|
||||||
|
"name": "MyDocManager",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"dependencies": {
|
||||||
|
"react-icons": "^5.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react": {
|
||||||
|
"version": "19.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||||
|
"integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react-icons": {
|
||||||
|
"version": "5.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.5.0.tgz",
|
||||||
|
"integrity": "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": "*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
5
package.json
Normal file
5
package.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"react-icons": "^5.5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,40 +1,66 @@
|
|||||||
amqp==5.3.1
|
amqp==5.3.1
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
anyio==4.10.0
|
anyio==4.10.0
|
||||||
|
asgiref==3.9.1
|
||||||
bcrypt==4.3.0
|
bcrypt==4.3.0
|
||||||
billiard==4.2.1
|
billiard==4.2.1
|
||||||
celery==5.5.3
|
celery==5.5.3
|
||||||
|
certifi==2025.8.3
|
||||||
|
cffi==2.0.0
|
||||||
|
charset-normalizer==3.4.3
|
||||||
click==8.2.1
|
click==8.2.1
|
||||||
click-didyoumean==0.3.1
|
click-didyoumean==0.3.1
|
||||||
click-plugins==1.1.1.2
|
click-plugins==1.1.1.2
|
||||||
click-repl==0.3.0
|
click-repl==0.3.0
|
||||||
|
cryptography==46.0.1
|
||||||
|
Deprecated==1.2.18
|
||||||
dnspython==2.8.0
|
dnspython==2.8.0
|
||||||
|
ecdsa==0.19.1
|
||||||
email-validator==2.3.0
|
email-validator==2.3.0
|
||||||
fastapi==0.116.1
|
fastapi==0.116.1
|
||||||
h11==0.16.0
|
h11==0.16.0
|
||||||
|
hiredis==3.2.1
|
||||||
|
httpcore==1.0.9
|
||||||
httptools==0.6.4
|
httptools==0.6.4
|
||||||
|
httpx==0.28.1
|
||||||
idna==3.10
|
idna==3.10
|
||||||
|
importlib_metadata==8.7.0
|
||||||
iniconfig==2.1.0
|
iniconfig==2.1.0
|
||||||
|
izulu==0.50.0
|
||||||
kombu==5.5.4
|
kombu==5.5.4
|
||||||
|
lxml==6.0.2
|
||||||
mongomock==4.3.0
|
mongomock==4.3.0
|
||||||
mongomock-motor==0.0.36
|
mongomock-motor==0.0.36
|
||||||
motor==3.7.1
|
motor==3.7.1
|
||||||
packaging==25.0
|
packaging==25.0
|
||||||
|
pikepdf==9.11.0
|
||||||
|
pillow==11.3.0
|
||||||
pipdeptree==2.28.0
|
pipdeptree==2.28.0
|
||||||
pluggy==1.6.0
|
pluggy==1.6.0
|
||||||
prompt_toolkit==3.0.52
|
prompt_toolkit==3.0.52
|
||||||
|
pyasn1==0.6.1
|
||||||
|
pycparser==2.23
|
||||||
|
pycron==3.2.0
|
||||||
pydantic==2.11.9
|
pydantic==2.11.9
|
||||||
pydantic_core==2.33.2
|
pydantic_core==2.33.2
|
||||||
Pygments==2.19.2
|
Pygments==2.19.2
|
||||||
|
PyJWT==2.10.1
|
||||||
pymongo==4.15.1
|
pymongo==4.15.1
|
||||||
|
PyMuPDF==1.26.4
|
||||||
|
pypandoc==1.15
|
||||||
pytest==8.4.2
|
pytest==8.4.2
|
||||||
pytest-asyncio==1.2.0
|
pytest-asyncio==1.2.0
|
||||||
pytest-mock==3.15.1
|
pytest-mock==3.15.1
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
|
python-docx==1.2.0
|
||||||
python-dotenv==1.1.1
|
python-dotenv==1.1.1
|
||||||
python-magic==0.4.27
|
python-magic==0.4.27
|
||||||
|
python-multipart==0.0.20
|
||||||
pytz==2025.2
|
pytz==2025.2
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.2
|
||||||
|
redis==6.4.0
|
||||||
|
reportlab==4.4.4
|
||||||
|
rsa==4.9.1
|
||||||
sentinels==1.1.1
|
sentinels==1.1.1
|
||||||
six==1.17.0
|
six==1.17.0
|
||||||
sniffio==1.3.1
|
sniffio==1.3.1
|
||||||
@@ -45,6 +71,9 @@ tzdata==2025.2
|
|||||||
uvicorn==0.35.0
|
uvicorn==0.35.0
|
||||||
uvloop==0.21.0
|
uvloop==0.21.0
|
||||||
vine==5.1.0
|
vine==5.1.0
|
||||||
|
watchdog==6.0.0
|
||||||
watchfiles==1.1.0
|
watchfiles==1.1.0
|
||||||
wcwidth==0.2.13
|
wcwidth==0.2.13
|
||||||
websockets==15.0.1
|
websockets==15.0.1
|
||||||
|
wrapt==1.17.3
|
||||||
|
zipp==3.23.0
|
||||||
|
|||||||
@@ -3,10 +3,23 @@ FROM python:3.12-slim
|
|||||||
# Set working directory
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install libmagic
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libmagic1 \
|
||||||
|
file \
|
||||||
|
pandoc \
|
||||||
|
ghostscript \
|
||||||
|
texlive-xetex \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
# Copy requirements and install dependencies
|
# Copy requirements and install dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Change the user
|
||||||
|
USER 1002:1002
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
@@ -15,5 +28,6 @@ ENV PYTHONPATH=/app
|
|||||||
# Expose port
|
# Expose port
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
|
|
||||||
# Command will be overridden by docker-compose
|
# Command will be overridden by docker-compose
|
||||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
0
src/file-processor/app/api/__init__.py
Normal file
0
src/file-processor/app/api/__init__.py
Normal file
107
src/file-processor/app/api/dependencies.py
Normal file
107
src/file-processor/app/api/dependencies.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import jwt
|
||||||
|
from fastapi import Depends, HTTPException
|
||||||
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||||
|
from jwt import InvalidTokenError
|
||||||
|
from starlette import status
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database.connection import get_database
|
||||||
|
from app.models.auth import UserRole
|
||||||
|
from app.models.user import UserInDB
|
||||||
|
from app.services.auth_service import AuthService
|
||||||
|
from app.services.document_service import DocumentService
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
security = HTTPBearer()
|
||||||
|
|
||||||
|
|
||||||
|
def get_auth_service() -> AuthService:
|
||||||
|
"""Dependency to get AuthService instance."""
|
||||||
|
return AuthService()
|
||||||
|
|
||||||
|
|
||||||
|
def get_user_service() -> UserService:
|
||||||
|
"""Dependency to get UserService instance."""
|
||||||
|
database = get_database()
|
||||||
|
return UserService(database)
|
||||||
|
|
||||||
|
|
||||||
|
def get_document_service() -> DocumentService:
|
||||||
|
"""Dependency to get DocumentService instance."""
|
||||||
|
database = get_database()
|
||||||
|
return DocumentService(database)
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_user(
|
||||||
|
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
) -> UserInDB:
|
||||||
|
"""
|
||||||
|
Dependency to get current authenticated user from JWT token.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials: HTTP Bearer credentials
|
||||||
|
user_service: Auth service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
User: Current authenticated user
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If token is invalid or user not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
payload = jwt.decode(
|
||||||
|
credentials.credentials,
|
||||||
|
settings.get_jwt_secret_key(),
|
||||||
|
algorithms=[settings.get_jwt_algorithm()]
|
||||||
|
)
|
||||||
|
username: str = payload.get("sub")
|
||||||
|
if username is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Could not validate credentials",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
except InvalidTokenError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Could not validate credentials",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
|
||||||
|
user = user_service.get_user_by_username(username)
|
||||||
|
if user is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Could not validate credentials",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
|
||||||
|
if not user.is_active:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="Inactive user"
|
||||||
|
)
|
||||||
|
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
|
def get_admin_user(current_user: UserInDB = Depends(get_current_user)) -> UserInDB:
|
||||||
|
"""
|
||||||
|
Dependency to ensure current user has admin role.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_user: Current authenticated user
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
User: Current user if admin
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If user is not admin
|
||||||
|
"""
|
||||||
|
if current_user.role != UserRole.ADMIN:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Not enough permissions"
|
||||||
|
)
|
||||||
|
return current_user
|
||||||
0
src/file-processor/app/api/routes/__init__.py
Normal file
0
src/file-processor/app/api/routes/__init__.py
Normal file
80
src/file-processor/app/api/routes/auth.py
Normal file
80
src/file-processor/app/api/routes/auth.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from fastapi import APIRouter, Depends, HTTPException, status
|
||||||
|
from fastapi.security import OAuth2PasswordRequestForm
|
||||||
|
|
||||||
|
from app.api.dependencies import get_auth_service, get_current_user, get_user_service
|
||||||
|
from app.models.auth import LoginResponse, UserResponse
|
||||||
|
from app.models.user import UserInDB
|
||||||
|
from app.services.auth_service import AuthService
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
router = APIRouter(tags=["authentication"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/login", response_model=LoginResponse)
|
||||||
|
def login(
|
||||||
|
form_data: OAuth2PasswordRequestForm = Depends(),
|
||||||
|
auth_service: AuthService = Depends(get_auth_service),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Authenticate user and return JWT token.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
form_data: OAuth2 password form data
|
||||||
|
auth_service: Auth service instance
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LoginResponse: JWT token and user info
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If authentication fails
|
||||||
|
"""
|
||||||
|
incorrect_username_or_pwd = HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Incorrect username or password",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
|
||||||
|
user = user_service.get_user_by_username(form_data.username)
|
||||||
|
if (not user or
|
||||||
|
not user.is_active or
|
||||||
|
not auth_service.verify_user_password(form_data.password, user.hashed_password)):
|
||||||
|
raise incorrect_username_or_pwd
|
||||||
|
|
||||||
|
access_token = auth_service.create_access_token(data={"sub": user.username})
|
||||||
|
|
||||||
|
return LoginResponse(
|
||||||
|
access_token=access_token,
|
||||||
|
user=UserResponse(
|
||||||
|
_id=user.id,
|
||||||
|
username=user.username,
|
||||||
|
email=user.email,
|
||||||
|
role=user.role,
|
||||||
|
is_active=user.is_active,
|
||||||
|
created_at=user.created_at,
|
||||||
|
updated_at=user.updated_at
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/me", response_model=UserResponse)
|
||||||
|
def get_current_user_profile(current_user: UserInDB = Depends(get_current_user)):
|
||||||
|
"""
|
||||||
|
Get current user profile.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_user: Current authenticated user
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UserResponse: Current user profile without sensitive data
|
||||||
|
"""
|
||||||
|
return UserResponse(
|
||||||
|
_id=current_user.id,
|
||||||
|
username=current_user.username,
|
||||||
|
email=current_user.email,
|
||||||
|
role=current_user.role,
|
||||||
|
is_active=current_user.is_active,
|
||||||
|
created_at=current_user.created_at,
|
||||||
|
updated_at=current_user.updated_at
|
||||||
|
)
|
||||||
243
src/file-processor/app/api/routes/document.py
Normal file
243
src/file-processor/app/api/routes/document.py
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
"""
|
||||||
|
Document API routes.
|
||||||
|
|
||||||
|
This module provides REST endpoints for document management operations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query, status, Path
|
||||||
|
from starlette.responses import Response
|
||||||
|
|
||||||
|
from app.api.dependencies import get_document_service, get_current_user
|
||||||
|
from app.models.document import DocumentResponse, FileDocument
|
||||||
|
from app.models.user import UserInDB
|
||||||
|
from app.services.document_service import DocumentService
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(tags=["Documents"])
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pdf_pages(pdf_file_path: str) -> int:
|
||||||
|
"""
|
||||||
|
Count the number of pages in a PDF file using PyMuPDF.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdf_file_path: Path to the PDF file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of pages in the PDF, or 0 if file cannot be read
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with fitz.open(pdf_file_path) as doc:
|
||||||
|
return doc.page_count
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not count pages for PDF {pdf_file_path}: {e}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _build_object_url(file_hash: Optional[str]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Build object URL from file hash.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_hash: SHA256 hash of the file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL string or None if hash is not provided
|
||||||
|
"""
|
||||||
|
if not file_hash:
|
||||||
|
return None
|
||||||
|
return f"/api/objects/{file_hash}"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_metadata_field(metadata: dict, field_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Extract a list field from metadata dictionary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata: Document metadata dictionary
|
||||||
|
field_name: Name of the field to extract
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of strings, empty list if field doesn't exist or is not a list
|
||||||
|
"""
|
||||||
|
field_value = metadata.get(field_name, [])
|
||||||
|
if isinstance(field_value, list):
|
||||||
|
return [str(item) for item in field_value]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _map_file_document_to_response(
|
||||||
|
document: FileDocument,
|
||||||
|
document_service: DocumentService
|
||||||
|
) -> DocumentResponse:
|
||||||
|
"""
|
||||||
|
Map FileDocument to DocumentResponse format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: FileDocument instance from database
|
||||||
|
document_service: Document service for file operations
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DocumentResponse instance ready for API response
|
||||||
|
"""
|
||||||
|
# Calculate page count for PDF files
|
||||||
|
page_count = 0
|
||||||
|
if document.pdf_file_hash and document_service.exists(document.pdf_file_hash):
|
||||||
|
pdf_path = document_service.get_document_path(document.pdf_file_hash)
|
||||||
|
page_count = _count_pdf_pages(pdf_path)
|
||||||
|
|
||||||
|
# Build URLs
|
||||||
|
thumbnail_url = _build_object_url(document.thumbnail_file_hash)
|
||||||
|
pdf_url = _build_object_url(document.pdf_file_hash)
|
||||||
|
|
||||||
|
# Extract tags and categories from metadata
|
||||||
|
tags = _extract_metadata_field(document.metadata, "tags")
|
||||||
|
categories = _extract_metadata_field(document.metadata, "categories")
|
||||||
|
|
||||||
|
# Format created_at timestamp
|
||||||
|
created_at = document.detected_at.isoformat() if document.detected_at else ""
|
||||||
|
|
||||||
|
as_dict = {
|
||||||
|
"id": str(document.id),
|
||||||
|
"name": document.filename,
|
||||||
|
"original_file_type": document.file_type.value.upper(),
|
||||||
|
"created_at": created_at,
|
||||||
|
"file_size": document.file_size,
|
||||||
|
"page_count": page_count,
|
||||||
|
"thumbnail_url": thumbnail_url,
|
||||||
|
"pdf_url": pdf_url,
|
||||||
|
"tags": tags,
|
||||||
|
"categories": categories
|
||||||
|
}
|
||||||
|
logger.info(f"Document: {as_dict}")
|
||||||
|
|
||||||
|
return DocumentResponse(**as_dict)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/documents", response_model=List[DocumentResponse])
|
||||||
|
def list_documents(
|
||||||
|
skip: int = Query(0, ge=0, description="Number of documents to skip"),
|
||||||
|
limit: int = Query(100, ge=1, le=1000, description="Maximum number of documents to return"),
|
||||||
|
user: UserInDB = Depends(get_current_user),
|
||||||
|
document_service: DocumentService = Depends(get_document_service)
|
||||||
|
) -> List[DocumentResponse]:
|
||||||
|
"""
|
||||||
|
Retrieve a paginated list of documents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
skip: Number of documents to skip for pagination
|
||||||
|
limit: Maximum number of documents to return
|
||||||
|
document_service: Document service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of documents in API response format
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get documents from service
|
||||||
|
documents = document_service.list_documents(skip=skip, limit=limit)
|
||||||
|
|
||||||
|
# Map to response format
|
||||||
|
document_responses = [
|
||||||
|
_map_file_document_to_response(doc, document_service)
|
||||||
|
for doc in documents
|
||||||
|
]
|
||||||
|
|
||||||
|
return document_responses
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list documents: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail="Failed to retrieve documents"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/objects/{file_hash}")
|
||||||
|
async def get_object_by_hash(
|
||||||
|
file_hash: str = Path(..., description="SHA256 hash of the object to retrieve"),
|
||||||
|
document_service: DocumentService = Depends(get_document_service),
|
||||||
|
user: UserInDB = Depends(get_current_user),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Serve object content by its hash.
|
||||||
|
|
||||||
|
This endpoint serves files (original documents, PDFs, thumbnails) by their
|
||||||
|
SHA256 hash. It supports all file types stored in the objects folder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_hash: SHA256 hash of the object
|
||||||
|
document_service: Document service dependency
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FileResponse with the requested object content
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If object not found (404) or server error (500)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check if object exists
|
||||||
|
if not document_service.exists(file_hash):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="Object not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get file path
|
||||||
|
file_path = document_service.get_document_path(file_hash)
|
||||||
|
|
||||||
|
# Verify file exists on disk
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.error(f"Object {file_hash} registered but file not found at {file_path}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="Object file not found on disk"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Determine media type based on file content
|
||||||
|
try:
|
||||||
|
file_content = document_service.get_document_content_by_hash(file_hash)
|
||||||
|
if not file_content:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="Object content not available"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Detect MIME type
|
||||||
|
import magic
|
||||||
|
mime_type = magic.from_buffer(file_content, mime=True)
|
||||||
|
|
||||||
|
# Return file content with appropriate headers
|
||||||
|
return Response(
|
||||||
|
content=file_content,
|
||||||
|
media_type=mime_type,
|
||||||
|
headers={
|
||||||
|
"Content-Length": str(len(file_content)),
|
||||||
|
"Cache-Control": "public, max-age=3600" # Cache for 1 hour
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading object content for hash {file_hash}: {str(e)}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail="Failed to read object content"
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
# Re-raise HTTP exceptions as-is
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error serving object {file_hash}: {str(e)}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail="Internal server error while serving object"
|
||||||
|
)
|
||||||
172
src/file-processor/app/api/routes/users.py
Normal file
172
src/file-processor/app/api/routes/users.py
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from starlette import status
|
||||||
|
|
||||||
|
from app.api.dependencies import get_admin_user, get_user_service
|
||||||
|
from app.models.auth import UserResponse, MessageResponse
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
from app.models.user import UserInDB, UserCreate, UserUpdate
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
router = APIRouter(tags=["users"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=list[UserInDB])
|
||||||
|
def list_users(
|
||||||
|
admin_user: UserInDB = Depends(get_admin_user),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
List all users (admin only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
admin_user: Current admin user
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[UserResponse]: List of all users without sensitive data
|
||||||
|
"""
|
||||||
|
return user_service.list_users()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{user_id}", response_model=UserResponse)
|
||||||
|
def get_user_by_id(
|
||||||
|
user_id: PyObjectId,
|
||||||
|
admin_user: UserInDB = Depends(get_admin_user),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get specific user by ID (admin only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: User ID to retrieve
|
||||||
|
admin_user: Current admin user
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UserResponse: User information without sensitive data
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If user not found
|
||||||
|
"""
|
||||||
|
user = user_service.get_user_by_id(str(user_id))
|
||||||
|
if not user:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="User not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
|
||||||
|
def create_user(
|
||||||
|
user_data: UserCreate,
|
||||||
|
admin_user: UserInDB = Depends(get_admin_user),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create new user (admin only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_data: User creation data
|
||||||
|
admin_user: Current admin user
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UserResponse: Created user information without sensitive data
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If user creation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
user = user_service.create_user(user_data)
|
||||||
|
return UserResponse(
|
||||||
|
_id=user.id,
|
||||||
|
username=user.username,
|
||||||
|
email=user.email,
|
||||||
|
role=user.role,
|
||||||
|
is_active=user.is_active,
|
||||||
|
created_at=user.created_at,
|
||||||
|
updated_at=user.updated_at
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/{user_id}", response_model=UserResponse)
|
||||||
|
def update_user(
|
||||||
|
user_id: PyObjectId,
|
||||||
|
user_data: UserUpdate,
|
||||||
|
admin_user: UserInDB = Depends(get_admin_user),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Update existing user (admin only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: User ID to update
|
||||||
|
user_data: User update data
|
||||||
|
admin_user: Current admin user
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UserResponse: Updated user information without sensitive data
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If user not found or update fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
user = user_service.update_user(str(user_id), user_data)
|
||||||
|
if not user:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="User not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return UserResponse(
|
||||||
|
_id=user.id,
|
||||||
|
username=user.username,
|
||||||
|
email=user.email,
|
||||||
|
role=user.role,
|
||||||
|
is_active=user.is_active,
|
||||||
|
created_at=user.created_at,
|
||||||
|
updated_at=user.updated_at
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/{user_id}", response_model=MessageResponse)
|
||||||
|
def delete_user(
|
||||||
|
user_id: PyObjectId,
|
||||||
|
admin_user: UserInDB = Depends(get_admin_user),
|
||||||
|
user_service: UserService = Depends(get_user_service)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Delete user by ID (admin only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: User ID to delete
|
||||||
|
admin_user: Current admin user
|
||||||
|
user_service: User service instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MessageResponse: Success message
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If user not found or deletion fails
|
||||||
|
"""
|
||||||
|
success = user_service.delete_user(str(user_id))
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="User not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageResponse(message="User successfully deleted")
|
||||||
@@ -6,7 +6,6 @@ using simple os.getenv() approach without external validation libraries.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
def get_mongodb_url() -> str:
|
def get_mongodb_url() -> str:
|
||||||
@@ -31,6 +30,10 @@ def get_mongodb_database_name() -> str:
|
|||||||
return os.getenv("MONGODB_DATABASE", "mydocmanager")
|
return os.getenv("MONGODB_DATABASE", "mydocmanager")
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_url() -> str:
|
||||||
|
return os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||||
|
|
||||||
|
|
||||||
def get_jwt_secret_key() -> str:
|
def get_jwt_secret_key() -> str:
|
||||||
"""
|
"""
|
||||||
Get JWT secret key from environment variables.
|
Get JWT secret key from environment variables.
|
||||||
@@ -82,4 +85,34 @@ def is_development_environment() -> bool:
|
|||||||
Returns:
|
Returns:
|
||||||
bool: True if development environment
|
bool: True if development environment
|
||||||
"""
|
"""
|
||||||
return os.getenv("ENVIRONMENT", "development").lower() == "development"
|
return os.getenv("ENVIRONMENT", "development").lower() == "development"
|
||||||
|
|
||||||
|
|
||||||
|
def get_objects_folder() -> str:
|
||||||
|
"""
|
||||||
|
Get Vault path from environment variables.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Vault path
|
||||||
|
"""
|
||||||
|
return os.getenv("OBJECTS_FOLDER", "/objects")
|
||||||
|
|
||||||
|
|
||||||
|
def get_watch_folder() -> str:
|
||||||
|
"""Directory to monitor for new files"""
|
||||||
|
return os.getenv("WATCH_DIRECTORY", "/watched_files")
|
||||||
|
|
||||||
|
|
||||||
|
def get_temp_folder() -> str:
|
||||||
|
"""Directory to store temporary files"""
|
||||||
|
return os.getenv("TEMP_DIRECTORY", "/tmp")
|
||||||
|
|
||||||
|
|
||||||
|
def get_errors_folder() -> str:
|
||||||
|
"""Directory to store temporary files"""
|
||||||
|
return os.getenv("ERRORS_DIRECTORY", "/errors")
|
||||||
|
|
||||||
|
|
||||||
|
def get_ignored_folder() -> str:
|
||||||
|
"""Directory to store temporary files"""
|
||||||
|
return os.getenv("IGNORED_DIRECTORY", "/ignored")
|
||||||
|
|||||||
@@ -4,19 +4,23 @@ MongoDB database connection management.
|
|||||||
This module handles MongoDB connection with fail-fast approach.
|
This module handles MongoDB connection with fail-fast approach.
|
||||||
The application will terminate if MongoDB is not accessible at startup.
|
The application will terminate if MongoDB is not accessible at startup.
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
from pymongo.database import Database
|
from pymongo.database import Database
|
||||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
||||||
|
|
||||||
from app.config.settings import get_mongodb_url, get_mongodb_database_name
|
from app.config.settings import get_mongodb_url, get_mongodb_database_name
|
||||||
|
from app.utils.security import safe_connection_string
|
||||||
|
|
||||||
# Global variables for singleton pattern
|
# Global variables for singleton pattern
|
||||||
_client: Optional[MongoClient] = None
|
_client: Optional[MongoClient] = None
|
||||||
_database: Optional[Database] = None
|
_database: Optional[Database] = None
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_mongodb_client() -> MongoClient:
|
def create_mongodb_client() -> MongoClient:
|
||||||
"""
|
"""
|
||||||
@@ -42,16 +46,16 @@ def create_mongodb_client() -> MongoClient:
|
|||||||
# Test connection by running admin command
|
# Test connection by running admin command
|
||||||
client.admin.command('ping')
|
client.admin.command('ping')
|
||||||
|
|
||||||
print(f"Successfully connected to MongoDB at {mongodb_url}")
|
logger.info(f"Successfully connected to MongoDB at {safe_connection_string(mongodb_url)}")
|
||||||
return client
|
return client
|
||||||
|
|
||||||
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
||||||
print(f"ERROR: Failed to connect to MongoDB at {mongodb_url}")
|
logger.error(f"ERROR: Failed to connect to MongoDB at {safe_connection_string(mongodb_url)}")
|
||||||
print(f"Connection error: {str(e)}")
|
logger.error(f"Connection error: {str(e)}")
|
||||||
print("MongoDB is required for this application. Please ensure MongoDB is running and accessible.")
|
logger.error("MongoDB is required for this application. Please ensure MongoDB is running and accessible.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR: Unexpected error connecting to MongoDB: {str(e)}")
|
logger.error(f"ERROR: Unexpected error connecting to MongoDB: {str(e)}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
@@ -73,7 +77,7 @@ def get_database() -> Database:
|
|||||||
|
|
||||||
database_name = get_mongodb_database_name()
|
database_name = get_mongodb_database_name()
|
||||||
_database = _client[database_name]
|
_database = _client[database_name]
|
||||||
print(f"Connected to database: {database_name}")
|
logger.info(f"Connected to database: {database_name}")
|
||||||
|
|
||||||
return _database
|
return _database
|
||||||
|
|
||||||
@@ -91,7 +95,7 @@ def close_database_connection():
|
|||||||
_client.close()
|
_client.close()
|
||||||
_client = None
|
_client = None
|
||||||
_database = None
|
_database = None
|
||||||
print("MongoDB connection closed")
|
logger.info("MongoDB connection closed")
|
||||||
|
|
||||||
|
|
||||||
def get_mongodb_client() -> Optional[MongoClient]:
|
def get_mongodb_client() -> Optional[MongoClient]:
|
||||||
@@ -107,6 +111,15 @@ def get_mongodb_client() -> Optional[MongoClient]:
|
|||||||
return _client
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
def get_extra_args(session):
|
||||||
|
# Build kwargs only if session is provided
|
||||||
|
kwargs = {}
|
||||||
|
if session is not None:
|
||||||
|
kwargs["session"] = session
|
||||||
|
|
||||||
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
def test_database_connection() -> bool:
|
def test_database_connection() -> bool:
|
||||||
"""
|
"""
|
||||||
Test if database connection is working.
|
Test if database connection is working.
|
||||||
@@ -122,4 +135,4 @@ def test_database_connection() -> bool:
|
|||||||
db.command('ping')
|
db.command('ping')
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -1,214 +0,0 @@
|
|||||||
from typing import List, Optional
|
|
||||||
from datetime import datetime
|
|
||||||
from motor.motor_asyncio import AsyncIOMotorDatabase, AsyncIOMotorCollection
|
|
||||||
from pymongo.errors import DuplicateKeyError, PyMongoError
|
|
||||||
from bson import ObjectId
|
|
||||||
|
|
||||||
from app.models.document import DocumentContent
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentContentRepository:
|
|
||||||
"""
|
|
||||||
Repository class for document content CRUD operations in MongoDB.
|
|
||||||
|
|
||||||
This class handles all database operations related to document content,
|
|
||||||
following the repository pattern with dependency injection and async/await.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, database: AsyncIOMotorDatabase):
|
|
||||||
"""
|
|
||||||
Initialize repository with database dependency.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
database (AsyncIOMotorDatabase): MongoDB database instance
|
|
||||||
"""
|
|
||||||
self.db = database
|
|
||||||
self.collection: AsyncIOMotorCollection = database.document_contents
|
|
||||||
self._ensure_indexes()
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
"""
|
|
||||||
Initialize repository by ensuring required indexes exist.
|
|
||||||
|
|
||||||
Should be called after repository instantiation to setup database indexes.
|
|
||||||
"""
|
|
||||||
await self._ensure_indexes()
|
|
||||||
|
|
||||||
async def _ensure_indexes(self):
|
|
||||||
"""
|
|
||||||
Ensure required database indexes exist.
|
|
||||||
|
|
||||||
Creates unique index on file_hash field to prevent duplicates.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
await self.collection.create_index("file_hash", unique=True)
|
|
||||||
except PyMongoError:
|
|
||||||
# Index might already exist, ignore error
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def create_document_content(self, document_content: DocumentContent) -> DocumentContent:
|
|
||||||
"""
|
|
||||||
Create a new document content in the database.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document_content (DocumentContent): Document content data
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
DocumentContent: Created document content with database ID
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
DuplicateKeyError: If file_hash already exists
|
|
||||||
ValueError: If document content creation fails due to validation
|
|
||||||
"""
|
|
||||||
document_dict = document_content.model_dump(by_alias=True, exclude_unset=True)
|
|
||||||
|
|
||||||
# Remove _id if it's None to let MongoDB generate it
|
|
||||||
if document_dict.get("_id") is None:
|
|
||||||
document_dict.pop("_id", None)
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await self.collection.insert_one(document_dict)
|
|
||||||
document_dict["_id"] = result.inserted_id
|
|
||||||
return DocumentContent(**document_dict)
|
|
||||||
except DuplicateKeyError as e:
|
|
||||||
raise DuplicateKeyError(f"Document content with file_hash '{document_content.file_hash}' already exists: {e}")
|
|
||||||
except PyMongoError as e:
|
|
||||||
raise ValueError(f"Failed to create document content: {e}")
|
|
||||||
|
|
||||||
async def find_document_content_by_id(self, document_id: str) -> Optional[DocumentContent]:
|
|
||||||
"""
|
|
||||||
Find document content by ID.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document_id (str): Document content ID to search for
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
DocumentContent or None: Document content if found, None otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not ObjectId.is_valid(document_id):
|
|
||||||
return None
|
|
||||||
|
|
||||||
document_doc = await self.collection.find_one({"_id": ObjectId(document_id)})
|
|
||||||
if document_doc:
|
|
||||||
return DocumentContent(**document_doc)
|
|
||||||
return None
|
|
||||||
except PyMongoError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def find_document_content_by_file_hash(self, file_hash: str) -> Optional[DocumentContent]:
|
|
||||||
"""
|
|
||||||
Find document content by file hash.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_hash (str): File hash to search for
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
DocumentContent or None: Document content if found, None otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
document_doc = await self.collection.find_one({"file_hash": file_hash})
|
|
||||||
if document_doc:
|
|
||||||
return DocumentContent(**document_doc)
|
|
||||||
return None
|
|
||||||
except PyMongoError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def content_exists(self, file_hash: str) -> bool:
|
|
||||||
"""
|
|
||||||
Check if document content exists by file hash.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_hash (str): File hash to check
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: True if document content exists, False otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
count = await self.collection.count_documents({"file_hash": file_hash})
|
|
||||||
return count > 0
|
|
||||||
except PyMongoError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def update_document_content(self, document_id: str, update_data: dict) -> Optional[DocumentContent]:
|
|
||||||
"""
|
|
||||||
Update document content information.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document_id (str): Document content ID to update
|
|
||||||
update_data (dict): Updated document content data
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
DocumentContent or None: Updated document content if found, None otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not ObjectId.is_valid(document_id):
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Remove None values and _id from update data
|
|
||||||
clean_update_data = {k: v for k, v in update_data.items() if v is not None and k != "_id"}
|
|
||||||
|
|
||||||
if not clean_update_data:
|
|
||||||
return await self.find_document_content_by_id(document_id)
|
|
||||||
|
|
||||||
result = await self.collection.find_one_and_update(
|
|
||||||
{"_id": ObjectId(document_id)},
|
|
||||||
{"$set": clean_update_data},
|
|
||||||
return_document=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result:
|
|
||||||
return DocumentContent(**result)
|
|
||||||
return None
|
|
||||||
|
|
||||||
except PyMongoError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def delete_document_content(self, document_id: str) -> bool:
|
|
||||||
"""
|
|
||||||
Delete document content from database.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document_id (str): Document content ID to delete
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: True if document content was deleted, False otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not ObjectId.is_valid(document_id):
|
|
||||||
return False
|
|
||||||
|
|
||||||
result = await self.collection.delete_one({"_id": ObjectId(document_id)})
|
|
||||||
return result.deleted_count > 0
|
|
||||||
except PyMongoError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def list_document_contents(self, skip: int = 0, limit: int = 100) -> List[DocumentContent]:
|
|
||||||
"""
|
|
||||||
List document contents with pagination.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
skip (int): Number of document contents to skip (default: 0)
|
|
||||||
limit (int): Maximum number of document contents to return (default: 100)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[DocumentContent]: List of document contents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
cursor = self.collection.find({}).skip(skip).limit(limit).sort("_id", -1)
|
|
||||||
document_docs = await cursor.to_list(length=limit)
|
|
||||||
return [DocumentContent(**document_doc) for document_doc in document_docs]
|
|
||||||
except PyMongoError:
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def count_document_contents(self) -> int:
|
|
||||||
"""
|
|
||||||
Count total number of document contents.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: Total number of document contents in database
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
return await self.collection.count_documents({})
|
|
||||||
except PyMongoError:
|
|
||||||
return 0
|
|
||||||
@@ -6,9 +6,13 @@ in MongoDB with proper error handling and type safety.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
|
|
||||||
from bson import ObjectId
|
from bson import ObjectId
|
||||||
|
from pymongo.collection import Collection
|
||||||
|
from pymongo.database import Database
|
||||||
from pymongo.errors import DuplicateKeyError, PyMongoError
|
from pymongo.errors import DuplicateKeyError, PyMongoError
|
||||||
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
|
|
||||||
|
from app.database.connection import get_extra_args
|
||||||
from app.models.document import FileDocument
|
from app.models.document import FileDocument
|
||||||
from app.utils.document_matching import fuzzy_matching, subsequence_matching
|
from app.utils.document_matching import fuzzy_matching, subsequence_matching
|
||||||
|
|
||||||
@@ -34,52 +38,49 @@ class FileDocumentRepository:
|
|||||||
with proper error handling and data validation.
|
with proper error handling and data validation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, database: AsyncIOMotorDatabase):
|
def __init__(self, database: Database):
|
||||||
"""Initialize file repository with database connection."""
|
"""Initialize file repository with database connection."""
|
||||||
self.db = database
|
self.db = database
|
||||||
self.collection: AsyncIOMotorCollection = self.db.files
|
self.collection: Collection = self.db.documents
|
||||||
self._ensure_indexes()
|
|
||||||
|
|
||||||
async def initialize(self):
|
def initialize(self):
|
||||||
"""
|
"""
|
||||||
Initialize repository by ensuring required indexes exist.
|
Initialize repository by ensuring required indexes exist.
|
||||||
|
|
||||||
Should be called after repository instantiation to setup database indexes.
|
Should be called after repository instantiation to setup database indexes.
|
||||||
"""
|
"""
|
||||||
await self._ensure_indexes()
|
self._ensure_indexes()
|
||||||
|
return self
|
||||||
|
|
||||||
async def _ensure_indexes(self):
|
def _ensure_indexes(self):
|
||||||
"""
|
"""
|
||||||
Ensure required database indexes exist.
|
Ensure required database indexes exist.
|
||||||
|
|
||||||
Creates unique index on username field to prevent duplicates.
|
Creates unique index on username field to prevent duplicates.
|
||||||
"""
|
"""
|
||||||
try:
|
pass
|
||||||
await self.collection.create_index("filepath", unique=True)
|
|
||||||
except PyMongoError:
|
|
||||||
# Index might already exist, ignore error
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def create_document(self, file_data: FileDocument) -> FileDocument:
|
def create_document(self, file_data: FileDocument, session=None) -> FileDocument:
|
||||||
"""
|
"""
|
||||||
Create a new file document in database.
|
Create a new file document in database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_data (FileDocument): File document data to create
|
file_data (FileDocument): File document data to create
|
||||||
|
session (AsyncIOMotorClientSession, optional): MongoDB session
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
FileDocument: Created file document with database ID
|
FileDocument: Created document with database ID
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If file creation fails due to validation
|
ValueError: If file creation fails due to validation
|
||||||
DuplicateKeyError: If file with same hash already exists
|
DuplicateKeyError: If a document with same hash already exists
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
file_dict = file_data.model_dump(by_alias=True, exclude_unset=True)
|
file_dict = file_data.model_dump(by_alias=True, exclude_unset=True)
|
||||||
if "_id" in file_dict and file_dict["_id"] is None:
|
if "_id" in file_dict and file_dict["_id"] is None:
|
||||||
del file_dict["_id"]
|
del file_dict["_id"]
|
||||||
|
|
||||||
result = await self.collection.insert_one(file_dict)
|
result = self.collection.insert_one(file_dict, **get_extra_args(session))
|
||||||
file_data.id = result.inserted_id
|
file_data.id = result.inserted_id
|
||||||
return file_data
|
return file_data
|
||||||
|
|
||||||
@@ -88,7 +89,7 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError as e:
|
except PyMongoError as e:
|
||||||
raise ValueError(f"Failed to create file document: {e}")
|
raise ValueError(f"Failed to create file document: {e}")
|
||||||
|
|
||||||
async def find_document_by_id(self, file_id: str) -> Optional[FileDocument]:
|
def find_document_by_id(self, file_id: str) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Find file document by ID.
|
Find file document by ID.
|
||||||
|
|
||||||
@@ -102,7 +103,7 @@ class FileDocumentRepository:
|
|||||||
if not ObjectId.is_valid(file_id):
|
if not ObjectId.is_valid(file_id):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
file_doc = await self.collection.find_one({"_id": ObjectId(file_id)})
|
file_doc = self.collection.find_one({"_id": ObjectId(file_id)})
|
||||||
if file_doc:
|
if file_doc:
|
||||||
return FileDocument(**file_doc)
|
return FileDocument(**file_doc)
|
||||||
return None
|
return None
|
||||||
@@ -110,7 +111,7 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
|
def find_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Find file document by file hash to detect duplicates.
|
Find file document by file hash to detect duplicates.
|
||||||
|
|
||||||
@@ -121,7 +122,7 @@ class FileDocumentRepository:
|
|||||||
FileDocument or None: File document if found, None otherwise
|
FileDocument or None: File document if found, None otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
file_doc = await self.collection.find_one({"file_hash": file_hash})
|
file_doc = self.collection.find_one({"file_hash": file_hash})
|
||||||
if file_doc:
|
if file_doc:
|
||||||
return FileDocument(**file_doc)
|
return FileDocument(**file_doc)
|
||||||
return None
|
return None
|
||||||
@@ -129,7 +130,48 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
def find_document_with_pdf_hash(self, file_hash: str) -> Optional[FileDocument]:
|
||||||
|
"""
|
||||||
|
Find file document by file hash with a pdf_file_hash set (not None).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_hash (str): SHA256 hash of file content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FileDocument or None: File document if found, None otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
file_doc = self.collection.find_one({"file_hash": file_hash,
|
||||||
|
"pdf_file_hash": {"$ne": None}})
|
||||||
|
if file_doc:
|
||||||
|
return FileDocument(**file_doc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except PyMongoError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_same_document(self, filename: str, file_hash: str):
|
||||||
|
"""
|
||||||
|
Find document with the same file_name and the same file hash
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename (str):
|
||||||
|
file_hash (str): SHA256 hash of file content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FileDocument or None: File document if found, None otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
file_doc = self.collection.find_one({"file_hash": file_hash,
|
||||||
|
"filename": filename})
|
||||||
|
if file_doc:
|
||||||
|
return FileDocument(**file_doc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except PyMongoError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Find file document by exact filepath.
|
Find file document by exact filepath.
|
||||||
|
|
||||||
@@ -140,7 +182,7 @@ class FileDocumentRepository:
|
|||||||
FileDocument or None: File document if found, None otherwise
|
FileDocument or None: File document if found, None otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
file_doc = await self.collection.find_one({"filepath": filepath})
|
file_doc = self.collection.find_one({"filepath": filepath})
|
||||||
if file_doc:
|
if file_doc:
|
||||||
return FileDocument(**file_doc)
|
return FileDocument(**file_doc)
|
||||||
return None
|
return None
|
||||||
@@ -148,7 +190,7 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_document_by_name(self, filename: str, matching_method: MatchMethodBase = None) -> List[FileDocument]:
|
def find_document_by_name(self, filename: str, matching_method: MatchMethodBase = None) -> List[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Find file documents by filename using fuzzy matching.
|
Find file documents by filename using fuzzy matching.
|
||||||
|
|
||||||
@@ -162,8 +204,7 @@ class FileDocumentRepository:
|
|||||||
try:
|
try:
|
||||||
# Get all files from database
|
# Get all files from database
|
||||||
cursor = self.collection.find({})
|
cursor = self.collection.find({})
|
||||||
all_files = await cursor.to_list(length=None)
|
all_documents = [FileDocument(**file_doc) for file_doc in cursor]
|
||||||
all_documents = [FileDocument(**file_doc) for file_doc in all_files]
|
|
||||||
|
|
||||||
if isinstance(matching_method, FuzzyMatching):
|
if isinstance(matching_method, FuzzyMatching):
|
||||||
return fuzzy_matching(filename, all_documents, matching_method.threshold)
|
return fuzzy_matching(filename, all_documents, matching_method.threshold)
|
||||||
@@ -173,7 +214,7 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def list_documents(self, skip: int = 0, limit: int = 100) -> List[FileDocument]:
|
def list_documents(self, skip: int = 0, limit: int = 100) -> List[FileDocument]:
|
||||||
"""
|
"""
|
||||||
List file documents with pagination.
|
List file documents with pagination.
|
||||||
|
|
||||||
@@ -186,13 +227,12 @@ class FileDocumentRepository:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
cursor = self.collection.find({}).skip(skip).limit(limit).sort("detected_at", -1)
|
cursor = self.collection.find({}).skip(skip).limit(limit).sort("detected_at", -1)
|
||||||
file_docs = await cursor.to_list(length=limit)
|
return [FileDocument(**doc) for doc in cursor]
|
||||||
return [FileDocument(**doc) for doc in file_docs]
|
|
||||||
|
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def count_documents(self) -> int:
|
def count_documents(self) -> int:
|
||||||
"""
|
"""
|
||||||
Count total number of file documents.
|
Count total number of file documents.
|
||||||
|
|
||||||
@@ -200,17 +240,18 @@ class FileDocumentRepository:
|
|||||||
int: Total number of file documents in collection
|
int: Total number of file documents in collection
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return await self.collection.count_documents({})
|
return self.collection.count_documents({})
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
async def update_document(self, file_id: str, update_data: dict) -> Optional[FileDocument]:
|
def update_document(self, file_id: str, update_data: dict, session=None) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Update file document with new data.
|
Update file document with new data.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_id (str): File document ID to update
|
file_id (str): File document ID to update
|
||||||
update_data (dict): Fields to update
|
update_data (dict): Fields to update
|
||||||
|
session (AsyncIOMotorClientSession, optional): MongoDB session
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
FileDocument or None: Updated file document if successful, None otherwise
|
FileDocument or None: Updated file document if successful, None otherwise
|
||||||
@@ -223,12 +264,13 @@ class FileDocumentRepository:
|
|||||||
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
||||||
|
|
||||||
if not clean_update_data:
|
if not clean_update_data:
|
||||||
return await self.find_document_by_id(file_id)
|
return self.find_document_by_id(file_id)
|
||||||
|
|
||||||
result = await self.collection.find_one_and_update(
|
result = self.collection.find_one_and_update(
|
||||||
{"_id": ObjectId(file_id)},
|
{"_id": ObjectId(file_id)},
|
||||||
{"$set": clean_update_data},
|
{"$set": clean_update_data},
|
||||||
return_document=True
|
return_document=True,
|
||||||
|
**get_extra_args(session)
|
||||||
)
|
)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
@@ -238,12 +280,13 @@ class FileDocumentRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def delete_document(self, file_id: str) -> bool:
|
def delete_document(self, file_id: str, session=None) -> bool:
|
||||||
"""
|
"""
|
||||||
Delete file document from database.
|
Delete file document from database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_id (str): File document ID to delete
|
file_id (str): File document ID to delete
|
||||||
|
session (AsyncIOMotorClientSession, optional): MongoDB session
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if file was deleted, False otherwise
|
bool: True if file was deleted, False otherwise
|
||||||
@@ -252,7 +295,7 @@ class FileDocumentRepository:
|
|||||||
if not ObjectId.is_valid(file_id):
|
if not ObjectId.is_valid(file_id):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
result = await self.collection.delete_one({"_id": ObjectId(file_id)})
|
result = self.collection.delete_one({"_id": ObjectId(file_id)}, **get_extra_args(session))
|
||||||
return result.deleted_count > 0
|
return result.deleted_count > 0
|
||||||
|
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
|
|||||||
230
src/file-processor/app/database/repositories/job_repository.py
Normal file
230
src/file-processor/app/database/repositories/job_repository.py
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
"""
|
||||||
|
Repository for managing processing jobs in MongoDB.
|
||||||
|
|
||||||
|
This module provides data access layer for ProcessingJob operations
|
||||||
|
with automatic timestamp management and error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pymongo.collection import Collection
|
||||||
|
from pymongo.database import Database
|
||||||
|
from pymongo.errors import PyMongoError
|
||||||
|
|
||||||
|
from app.exceptions.job_exceptions import JobRepositoryError
|
||||||
|
from app.models.job import ProcessingJob, ProcessingStatus
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
|
class JobRepository:
|
||||||
|
"""
|
||||||
|
Repository for processing job data access operations.
|
||||||
|
|
||||||
|
Provides CRUD operations for ProcessingJob documents with automatic
|
||||||
|
timestamp management and proper error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, database: Database):
|
||||||
|
"""Initialize repository with MongoDB collection reference."""
|
||||||
|
self.db = database
|
||||||
|
self.collection: Collection = self.db.processing_jobs
|
||||||
|
|
||||||
|
def _ensure_indexes(self):
|
||||||
|
"""
|
||||||
|
Ensure required database indexes exist.
|
||||||
|
|
||||||
|
Creates unique index on username field to prevent duplicates.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.collection.create_index("document_id", unique=True)
|
||||||
|
except PyMongoError:
|
||||||
|
# Index might already exist, ignore error
|
||||||
|
pass
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""
|
||||||
|
Initialize repository by ensuring required indexes exist.
|
||||||
|
|
||||||
|
Should be called after repository instantiation to setup database indexes.
|
||||||
|
"""
|
||||||
|
self._ensure_indexes()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def create_job(self, document_id: PyObjectId, task_id: Optional[str] = None) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Create a new processing job.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_id: Reference to the file document
|
||||||
|
task_id: Optional Celery task UUID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The created ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
job_data = {
|
||||||
|
"document_id": document_id,
|
||||||
|
"status": ProcessingStatus.PENDING,
|
||||||
|
"task_id": task_id,
|
||||||
|
"created_at": datetime.now(),
|
||||||
|
"started_at": None,
|
||||||
|
"completed_at": None,
|
||||||
|
"error_message": None
|
||||||
|
}
|
||||||
|
|
||||||
|
result = self.collection.insert_one(job_data)
|
||||||
|
job_data["_id"] = result.inserted_id
|
||||||
|
|
||||||
|
return ProcessingJob(**job_data)
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("create_job", e)
|
||||||
|
|
||||||
|
def find_job_by_id(self, job_id: PyObjectId) -> Optional[ProcessingJob]:
|
||||||
|
"""
|
||||||
|
Retrieve a job by its ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ProcessingJob document
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
job_data = self.collection.find_one({"_id": job_id})
|
||||||
|
if job_data:
|
||||||
|
return ProcessingJob(**job_data)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("get_job_by_id", e)
|
||||||
|
|
||||||
|
def update_job_status(
|
||||||
|
self,
|
||||||
|
job_id: PyObjectId,
|
||||||
|
status: ProcessingStatus,
|
||||||
|
error_message: Optional[str] = None
|
||||||
|
) -> Optional[ProcessingJob]:
|
||||||
|
"""
|
||||||
|
Update job status with automatic timestamp management.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
status: New processing status
|
||||||
|
error_message: Optional error message for failed jobs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The updated ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Prepare update data
|
||||||
|
update_data = {"status": status}
|
||||||
|
|
||||||
|
# Set appropriate timestamp based on status
|
||||||
|
current_time = datetime.now()
|
||||||
|
if status == ProcessingStatus.PROCESSING:
|
||||||
|
update_data["started_at"] = current_time
|
||||||
|
elif status in (ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
|
||||||
|
update_data["completed_at"] = current_time
|
||||||
|
|
||||||
|
# Add error message if provided
|
||||||
|
if error_message is not None:
|
||||||
|
update_data["error_message"] = error_message
|
||||||
|
|
||||||
|
result = self.collection.find_one_and_update(
|
||||||
|
{"_id": job_id},
|
||||||
|
{"$set": update_data},
|
||||||
|
return_document=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
return ProcessingJob(**result)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("update_job_status", e)
|
||||||
|
|
||||||
|
def delete_job(self, job_id: PyObjectId) -> bool:
|
||||||
|
"""
|
||||||
|
Delete a job from the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if job was deleted, False if not found
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = self.collection.delete_one({"_id": job_id})
|
||||||
|
|
||||||
|
return result.deleted_count > 0
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("delete_job", e)
|
||||||
|
|
||||||
|
def find_jobs_by_document_id(self, document_id: PyObjectId) -> List[ProcessingJob]:
|
||||||
|
"""
|
||||||
|
Retrieve all jobs for a specific file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document_id: The file ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ProcessingJob documents
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cursor = self.collection.find({"document_id": document_id})
|
||||||
|
|
||||||
|
jobs = []
|
||||||
|
for job_data in cursor:
|
||||||
|
jobs.append(ProcessingJob(**job_data))
|
||||||
|
|
||||||
|
return jobs
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("get_jobs_by_file_id", e)
|
||||||
|
|
||||||
|
def get_jobs_by_status(self, status: ProcessingStatus) -> List[ProcessingJob]:
|
||||||
|
"""
|
||||||
|
Retrieve all jobs with a specific status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: The processing status to filter by
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ProcessingJob documents
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cursor = self.collection.find({"status": status})
|
||||||
|
|
||||||
|
jobs = []
|
||||||
|
for job_data in cursor:
|
||||||
|
jobs.append(ProcessingJob(**job_data))
|
||||||
|
|
||||||
|
return jobs
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
raise JobRepositoryError("get_jobs_by_status", e)
|
||||||
@@ -5,10 +5,12 @@ This module implements the repository pattern for user CRUD operations
|
|||||||
with dependency injection of the database connection using async/await.
|
with dependency injection of the database connection using async/await.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, List
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
from bson import ObjectId
|
from bson import ObjectId
|
||||||
from motor.motor_asyncio import AsyncIOMotorDatabase, AsyncIOMotorCollection
|
from pymongo.collection import Collection
|
||||||
|
from pymongo.database import Database
|
||||||
from pymongo.errors import DuplicateKeyError, PyMongoError
|
from pymongo.errors import DuplicateKeyError, PyMongoError
|
||||||
|
|
||||||
from app.models.user import UserCreate, UserInDB, UserUpdate
|
from app.models.user import UserCreate, UserInDB, UserUpdate
|
||||||
@@ -23,7 +25,7 @@ class UserRepository:
|
|||||||
following the repository pattern with dependency injection and async/await.
|
following the repository pattern with dependency injection and async/await.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, database: AsyncIOMotorDatabase):
|
def __init__(self, database: Database):
|
||||||
"""
|
"""
|
||||||
Initialize repository with database dependency.
|
Initialize repository with database dependency.
|
||||||
|
|
||||||
@@ -31,30 +33,30 @@ class UserRepository:
|
|||||||
database (AsyncIOMotorDatabase): MongoDB database instance
|
database (AsyncIOMotorDatabase): MongoDB database instance
|
||||||
"""
|
"""
|
||||||
self.db = database
|
self.db = database
|
||||||
self.collection: AsyncIOMotorCollection = database.users
|
self.collection: Collection = database.users
|
||||||
self._ensure_indexes()
|
|
||||||
|
|
||||||
async def initialize(self):
|
def initialize(self):
|
||||||
"""
|
"""
|
||||||
Initialize repository by ensuring required indexes exist.
|
Initialize repository by ensuring required indexes exist.
|
||||||
|
|
||||||
Should be called after repository instantiation to setup database indexes.
|
Should be called after repository instantiation to setup database indexes.
|
||||||
"""
|
"""
|
||||||
await self._ensure_indexes()
|
self._ensure_indexes()
|
||||||
|
return self
|
||||||
|
|
||||||
async def _ensure_indexes(self):
|
def _ensure_indexes(self):
|
||||||
"""
|
"""
|
||||||
Ensure required database indexes exist.
|
Ensure required database indexes exist.
|
||||||
|
|
||||||
Creates unique index on username field to prevent duplicates.
|
Creates unique index on username field to prevent duplicates.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
await self.collection.create_index("username", unique=True)
|
self.collection.create_index("username", unique=True)
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
# Index might already exist, ignore error
|
# Index might already exist, ignore error
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def create_user(self, user_data: UserCreate) -> UserInDB:
|
def create_user(self, user_data: UserCreate) -> UserInDB:
|
||||||
"""
|
"""
|
||||||
Create a new user in the database.
|
Create a new user in the database.
|
||||||
|
|
||||||
@@ -79,7 +81,7 @@ class UserRepository:
|
|||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = await self.collection.insert_one(user_dict)
|
result = self.collection.insert_one(user_dict)
|
||||||
user_dict["_id"] = result.inserted_id
|
user_dict["_id"] = result.inserted_id
|
||||||
return UserInDB(**user_dict)
|
return UserInDB(**user_dict)
|
||||||
except DuplicateKeyError as e:
|
except DuplicateKeyError as e:
|
||||||
@@ -87,7 +89,7 @@ class UserRepository:
|
|||||||
except PyMongoError as e:
|
except PyMongoError as e:
|
||||||
raise ValueError(f"Failed to create user: {e}")
|
raise ValueError(f"Failed to create user: {e}")
|
||||||
|
|
||||||
async def find_user_by_username(self, username: str) -> Optional[UserInDB]:
|
def find_user_by_username(self, username: str) -> Optional[UserInDB]:
|
||||||
"""
|
"""
|
||||||
Find user by username.
|
Find user by username.
|
||||||
|
|
||||||
@@ -98,14 +100,14 @@ class UserRepository:
|
|||||||
UserInDB or None: User if found, None otherwise
|
UserInDB or None: User if found, None otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
user_doc = await self.collection.find_one({"username": username})
|
user_doc = self.collection.find_one({"username": username})
|
||||||
if user_doc:
|
if user_doc:
|
||||||
return UserInDB(**user_doc)
|
return UserInDB(**user_doc)
|
||||||
return None
|
return None
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
|
def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
|
||||||
"""
|
"""
|
||||||
Find user by ID.
|
Find user by ID.
|
||||||
|
|
||||||
@@ -119,14 +121,14 @@ class UserRepository:
|
|||||||
if not ObjectId.is_valid(user_id):
|
if not ObjectId.is_valid(user_id):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
user_doc = await self.collection.find_one({"_id": ObjectId(user_id)})
|
user_doc = self.collection.find_one({"_id": ObjectId(user_id)})
|
||||||
if user_doc:
|
if user_doc:
|
||||||
return UserInDB(**user_doc)
|
return UserInDB(**user_doc)
|
||||||
return None
|
return None
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_user_by_email(self, email: str) -> Optional[UserInDB]:
|
def find_user_by_email(self, email: str) -> Optional[UserInDB]:
|
||||||
"""
|
"""
|
||||||
Find user by email address.
|
Find user by email address.
|
||||||
|
|
||||||
@@ -137,14 +139,14 @@ class UserRepository:
|
|||||||
UserInDB or None: User if found, None otherwise
|
UserInDB or None: User if found, None otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
user_doc = await self.collection.find_one({"email": email})
|
user_doc = self.collection.find_one({"email": email})
|
||||||
if user_doc:
|
if user_doc:
|
||||||
return UserInDB(**user_doc)
|
return UserInDB(**user_doc)
|
||||||
return None
|
return None
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
|
def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
|
||||||
"""
|
"""
|
||||||
Update user information.
|
Update user information.
|
||||||
|
|
||||||
@@ -172,14 +174,16 @@ class UserRepository:
|
|||||||
update_data["role"] = user_update.role
|
update_data["role"] = user_update.role
|
||||||
if user_update.is_active is not None:
|
if user_update.is_active is not None:
|
||||||
update_data["is_active"] = user_update.is_active
|
update_data["is_active"] = user_update.is_active
|
||||||
|
if user_update.preferences is not None:
|
||||||
|
update_data["preferences"] = user_update.preferences
|
||||||
|
|
||||||
# Remove None values from update data
|
# Remove None values from update data
|
||||||
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
||||||
|
|
||||||
if not clean_update_data:
|
if not clean_update_data:
|
||||||
return await self.find_user_by_id(user_id)
|
return self.find_user_by_id(user_id)
|
||||||
|
|
||||||
result = await self.collection.find_one_and_update(
|
result = self.collection.find_one_and_update(
|
||||||
{"_id": ObjectId(user_id)},
|
{"_id": ObjectId(user_id)},
|
||||||
{"$set": clean_update_data},
|
{"$set": clean_update_data},
|
||||||
return_document=True
|
return_document=True
|
||||||
@@ -192,7 +196,7 @@ class UserRepository:
|
|||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def delete_user(self, user_id: str) -> bool:
|
def delete_user(self, user_id: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Delete user from database.
|
Delete user from database.
|
||||||
|
|
||||||
@@ -206,12 +210,12 @@ class UserRepository:
|
|||||||
if not ObjectId.is_valid(user_id):
|
if not ObjectId.is_valid(user_id):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
result = await self.collection.delete_one({"_id": ObjectId(user_id)})
|
result = self.collection.delete_one({"_id": ObjectId(user_id)})
|
||||||
return result.deleted_count > 0
|
return result.deleted_count > 0
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
|
def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
|
||||||
"""
|
"""
|
||||||
List users with pagination.
|
List users with pagination.
|
||||||
|
|
||||||
@@ -224,12 +228,12 @@ class UserRepository:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
cursor = self.collection.find({}).skip(skip).limit(limit).sort("created_at", -1)
|
cursor = self.collection.find({}).skip(skip).limit(limit).sort("created_at", -1)
|
||||||
user_docs = await cursor.to_list(length=limit)
|
user_docs = cursor.to_list(length=limit)
|
||||||
return [UserInDB(**user_doc) for user_doc in user_docs]
|
return [UserInDB(**user_doc) for user_doc in user_docs]
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def count_users(self) -> int:
|
def count_users(self) -> int:
|
||||||
"""
|
"""
|
||||||
Count total number of users.
|
Count total number of users.
|
||||||
|
|
||||||
@@ -237,11 +241,11 @@ class UserRepository:
|
|||||||
int: Total number of users in database
|
int: Total number of users in database
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return await self.collection.count_documents({})
|
return self.collection.count_documents({})
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
async def user_exists(self, username: str) -> bool:
|
def user_exists(self, username: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if user exists by username.
|
Check if user exists by username.
|
||||||
|
|
||||||
@@ -252,7 +256,7 @@ class UserRepository:
|
|||||||
bool: True if user exists, False otherwise
|
bool: True if user exists, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
count = await self.collection.count_documents({"username": username})
|
count = self.collection.count_documents({"username": username})
|
||||||
return count > 0
|
return count > 0
|
||||||
except PyMongoError:
|
except PyMongoError:
|
||||||
return False
|
return False
|
||||||
|
|||||||
0
src/file-processor/app/exceptions/__init__.py
Normal file
0
src/file-processor/app/exceptions/__init__.py
Normal file
38
src/file-processor/app/exceptions/job_exceptions.py
Normal file
38
src/file-processor/app/exceptions/job_exceptions.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""
|
||||||
|
Custom exceptions for job management operations.
|
||||||
|
|
||||||
|
This module defines specific exceptions for job processing lifecycle
|
||||||
|
and repository operations to provide clear error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from app.models.job import ProcessingStatus
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidStatusTransitionError(Exception):
|
||||||
|
"""
|
||||||
|
Raised when an invalid status transition is attempted.
|
||||||
|
|
||||||
|
This exception indicates that an attempt was made to change a job's
|
||||||
|
status to an invalid target status given the current status.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, current_status: ProcessingStatus, target_status: ProcessingStatus):
|
||||||
|
self.current_status = current_status
|
||||||
|
self.target_status = target_status
|
||||||
|
super().__init__(
|
||||||
|
f"Invalid status transition from '{current_status}' to '{target_status}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JobRepositoryError(Exception):
|
||||||
|
"""
|
||||||
|
Raised when a MongoDB operation fails in the job repository.
|
||||||
|
|
||||||
|
This exception wraps database-related errors that occur during
|
||||||
|
job repository operations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, operation: str, original_error: Exception):
|
||||||
|
self.operation = operation
|
||||||
|
self.original_error = original_error
|
||||||
|
super().__init__(f"Repository operation '{operation}' failed: {str(original_error)}")
|
||||||
242
src/file-processor/app/file_watcher.py
Normal file
242
src/file-processor/app/file_watcher.py
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
"""
|
||||||
|
File watcher implementation with Watchdog observer and ProcessingJob management.
|
||||||
|
|
||||||
|
This module provides real-time file monitoring for document processing.
|
||||||
|
When a file is created in the watched directory, it:
|
||||||
|
1. Creates a document record via DocumentService
|
||||||
|
2. Dispatches a Celery task for processing
|
||||||
|
3. Creates a ProcessingJob to track the task lifecycle
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
|
||||||
|
from watchdog.observers import Observer
|
||||||
|
|
||||||
|
from app.services.document_service import DocumentService
|
||||||
|
from app.services.job_service import JobService
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentFileEventHandler(FileSystemEventHandler):
|
||||||
|
"""
|
||||||
|
Event handler for document file creation events.
|
||||||
|
|
||||||
|
Processes newly created files by creating document records,
|
||||||
|
dispatching Celery tasks, and managing processing jobs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx', '.jpg', '.png', '.jpeg'}
|
||||||
|
|
||||||
|
def __init__(self, document_service: DocumentService, job_service: JobService):
|
||||||
|
"""
|
||||||
|
Initialize the event handler.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document_service: Service for document management
|
||||||
|
job_service: Service for processing job management
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.document_service = document_service
|
||||||
|
self.job_service = job_service
|
||||||
|
|
||||||
|
def on_created(self, event: FileCreatedEvent) -> None:
|
||||||
|
"""
|
||||||
|
Handle file creation events.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: File system event containing file path information
|
||||||
|
"""
|
||||||
|
if event.is_directory:
|
||||||
|
return
|
||||||
|
|
||||||
|
filepath = event.src_path
|
||||||
|
file_extension = Path(filepath).suffix.lower()
|
||||||
|
|
||||||
|
if file_extension not in self.SUPPORTED_EXTENSIONS:
|
||||||
|
logger.info(f"Ignoring unsupported file type: {filepath}")
|
||||||
|
self.document_service.move_to_ignored(filepath, "unsupported file type")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"Processing new file: {filepath}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tasks.document_processing import process_document
|
||||||
|
task_result = process_document.delay(filepath)
|
||||||
|
task_id = task_result.task_id
|
||||||
|
logger.info(f"Dispatched Celery task with ID: {task_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to process file {filepath}: {str(e)}")
|
||||||
|
# Note: We don't re-raise the exception to keep the watcher running
|
||||||
|
|
||||||
|
|
||||||
|
class FileWatcher:
|
||||||
|
"""
|
||||||
|
File system watcher for automatic document processing.
|
||||||
|
|
||||||
|
Monitors a directory for new files and triggers processing pipeline
|
||||||
|
using a dedicated observer thread.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
watch_directory: str,
|
||||||
|
document_service: DocumentService,
|
||||||
|
job_service: JobService,
|
||||||
|
recursive: bool = True
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the file watcher.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
watch_directory: Directory path to monitor
|
||||||
|
document_service: Service for document management
|
||||||
|
job_service: Service for processing job management
|
||||||
|
recursive: Whether to watch subdirectories recursively
|
||||||
|
"""
|
||||||
|
self.watch_directory = Path(watch_directory)
|
||||||
|
self.recursive = recursive
|
||||||
|
self.observer: Optional[Observer] = None
|
||||||
|
self._observer_thread: Optional[threading.Thread] = None
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
|
||||||
|
# Validate watch directory
|
||||||
|
if not self.watch_directory.exists():
|
||||||
|
raise ValueError(f"Watch directory does not exist: {watch_directory}")
|
||||||
|
|
||||||
|
if not self.watch_directory.is_dir():
|
||||||
|
raise ValueError(f"Watch path is not a directory: {watch_directory}")
|
||||||
|
|
||||||
|
# Create event handler
|
||||||
|
self.event_handler = DocumentFileEventHandler(
|
||||||
|
document_service=document_service,
|
||||||
|
job_service=job_service
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"FileWatcher initialized for directory: {self.watch_directory}")
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""
|
||||||
|
Start the file watcher in a separate thread.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If the watcher is already running
|
||||||
|
"""
|
||||||
|
if self.is_running():
|
||||||
|
raise RuntimeError("FileWatcher is already running")
|
||||||
|
|
||||||
|
self.observer = Observer()
|
||||||
|
self.observer.schedule(
|
||||||
|
self.event_handler,
|
||||||
|
str(self.watch_directory),
|
||||||
|
recursive=self.recursive
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start observer in separate thread
|
||||||
|
self._observer_thread = threading.Thread(
|
||||||
|
target=self._run_observer,
|
||||||
|
name="FileWatcher-Observer"
|
||||||
|
)
|
||||||
|
self._stop_event.clear()
|
||||||
|
self._observer_thread.start()
|
||||||
|
|
||||||
|
logger.info("FileWatcher started successfully")
|
||||||
|
|
||||||
|
def stop(self, timeout: float = 5.0) -> None:
|
||||||
|
"""
|
||||||
|
Stop the file watcher gracefully.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Maximum time to wait for graceful shutdown
|
||||||
|
"""
|
||||||
|
if not self.is_running():
|
||||||
|
logger.warning("FileWatcher is not running")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Stopping FileWatcher...")
|
||||||
|
|
||||||
|
# Signal stop and wait for observer thread
|
||||||
|
self._stop_event.set()
|
||||||
|
|
||||||
|
if self.observer:
|
||||||
|
self.observer.stop()
|
||||||
|
|
||||||
|
if self._observer_thread and self._observer_thread.is_alive():
|
||||||
|
self._observer_thread.join(timeout=timeout)
|
||||||
|
|
||||||
|
if self._observer_thread.is_alive():
|
||||||
|
logger.warning("FileWatcher thread did not stop gracefully within timeout")
|
||||||
|
else:
|
||||||
|
logger.info("FileWatcher stopped gracefully")
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
self.observer = None
|
||||||
|
self._observer_thread = None
|
||||||
|
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the file watcher is currently running.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the watcher is running, False otherwise
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
self.observer is not None
|
||||||
|
and self._observer_thread is not None
|
||||||
|
and self._observer_thread.is_alive()
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_observer(self) -> None:
|
||||||
|
"""
|
||||||
|
Internal method to run the observer in a separate thread.
|
||||||
|
|
||||||
|
This method should not be called directly.
|
||||||
|
"""
|
||||||
|
if not self.observer:
|
||||||
|
logger.error("Observer not initialized")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.observer.start()
|
||||||
|
logger.info("Observer thread started")
|
||||||
|
|
||||||
|
# Keep the observer running until stop is requested
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
self._stop_event.wait(timeout=1.0)
|
||||||
|
|
||||||
|
logger.info("Observer thread stopping...")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Observer thread error: {str(e)}")
|
||||||
|
finally:
|
||||||
|
if self.observer:
|
||||||
|
self.observer.join()
|
||||||
|
logger.info("Observer thread stopped")
|
||||||
|
|
||||||
|
|
||||||
|
def create_file_watcher(
|
||||||
|
watch_directory: str,
|
||||||
|
document_service: DocumentService,
|
||||||
|
job_service: JobService
|
||||||
|
) -> FileWatcher:
|
||||||
|
"""
|
||||||
|
Factory function to create a FileWatcher instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
watch_directory: Directory path to monitor
|
||||||
|
document_service: Service for document management
|
||||||
|
job_service: Service for processing job management
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured FileWatcher instance
|
||||||
|
"""
|
||||||
|
return FileWatcher(
|
||||||
|
watch_directory=watch_directory,
|
||||||
|
document_service=document_service,
|
||||||
|
job_service=job_service
|
||||||
|
)
|
||||||
@@ -1,203 +1,170 @@
|
|||||||
"""
|
"""
|
||||||
FastAPI application for MyDocManager file processor service.
|
FastAPI application with integrated FileWatcher for document processing.
|
||||||
|
|
||||||
This service provides API endpoints for health checks and task dispatching.
|
This module provides the main FastAPI application with:
|
||||||
|
- JWT authentication
|
||||||
|
- User management APIs
|
||||||
|
- Real-time file monitoring via FileWatcher
|
||||||
|
- Document processing via Celery tasks
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from fastapi import FastAPI, HTTPException, Depends
|
from typing import AsyncGenerator
|
||||||
from pydantic import BaseModel
|
|
||||||
import redis
|
|
||||||
from celery import Celery
|
|
||||||
|
|
||||||
from app.database.connection import test_database_connection, get_database
|
from fastapi import FastAPI
|
||||||
from app.database.repositories.user_repository import UserRepository
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from app.models.user import UserCreate
|
|
||||||
|
from app.api.routes.auth import router as auth_router
|
||||||
|
from app.api.routes.users import router as users_router
|
||||||
|
from app.api.routes.document import router as documents_router
|
||||||
|
from app.config import settings
|
||||||
|
from app.database.connection import get_database
|
||||||
|
from app.file_watcher import create_file_watcher, FileWatcher
|
||||||
|
from app.services.document_service import DocumentService
|
||||||
from app.services.init_service import InitializationService
|
from app.services.init_service import InitializationService
|
||||||
|
from app.services.job_service import JobService
|
||||||
from app.services.user_service import UserService
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global file watcher instance
|
||||||
|
file_watcher: FileWatcher = None
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||||
"""
|
"""
|
||||||
Application lifespan manager for startup and shutdown tasks.
|
FastAPI lifespan context manager.
|
||||||
|
|
||||||
Handles initialization tasks that need to run when the application starts,
|
Handles application startup and shutdown events including:
|
||||||
including admin user creation and other setup procedures.
|
- Database connection
|
||||||
|
- Default admin user creation
|
||||||
|
- FileWatcher startup/shutdown
|
||||||
"""
|
"""
|
||||||
# Startup tasks
|
global file_watcher
|
||||||
|
|
||||||
|
# Startup
|
||||||
logger.info("Starting MyDocManager application...")
|
logger.info("Starting MyDocManager application...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Initialize database connection
|
# Initialize database connection
|
||||||
database = get_database()
|
database = get_database()
|
||||||
|
logger.info("Database connection established")
|
||||||
|
|
||||||
# Initialize repositories and services
|
document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
|
||||||
user_repository = UserRepository(database)
|
job_service = JobService(database=database)
|
||||||
user_service = UserService(user_repository)
|
user_service = UserService(database=database)
|
||||||
|
logger.info("Service created")
|
||||||
|
|
||||||
|
# Create default admin user
|
||||||
init_service = InitializationService(user_service)
|
init_service = InitializationService(user_service)
|
||||||
|
init_service.initialize_application()
|
||||||
|
logger.info("Default admin user initialization completed")
|
||||||
|
|
||||||
# Run initialization tasks
|
# Create and start file watcher
|
||||||
initialization_result = init_service.initialize_application()
|
file_watcher = create_file_watcher(
|
||||||
|
watch_directory=settings.get_watch_folder(),
|
||||||
|
document_service=document_service,
|
||||||
|
job_service=job_service
|
||||||
|
)
|
||||||
|
file_watcher.start()
|
||||||
|
logger.info(f"FileWatcher started for directory: {settings.get_watch_folder()}")
|
||||||
|
|
||||||
if initialization_result["initialization_success"]:
|
logger.info("Application startup completed successfully")
|
||||||
logger.info("Application startup completed successfully")
|
|
||||||
if initialization_result["admin_user_created"]:
|
yield
|
||||||
logger.info("Default admin user was created during startup")
|
|
||||||
else:
|
|
||||||
logger.error("Application startup completed with errors:")
|
|
||||||
for error in initialization_result["errors"]:
|
|
||||||
logger.error(f" - {error}")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Critical error during application startup: {str(e)}")
|
logger.error(f"Application startup failed: {str(e)}")
|
||||||
# You might want to decide if the app should continue or exit here
|
raise
|
||||||
# For now, we log the error but continue
|
|
||||||
|
|
||||||
yield # Application is running
|
finally:
|
||||||
|
# Shutdown
|
||||||
# Shutdown tasks (if needed)
|
logger.info("Shutting down MyDocManager application...")
|
||||||
logger.info("Shutting down MyDocManager application...")
|
|
||||||
|
if file_watcher and file_watcher.is_running():
|
||||||
|
file_watcher.stop()
|
||||||
|
logger.info("FileWatcher stopped")
|
||||||
|
|
||||||
|
logger.info("Application shutdown completed")
|
||||||
|
|
||||||
|
|
||||||
# Initialize FastAPI app
|
# Create FastAPI application
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="MyDocManager File Processor",
|
title="MyDocManager",
|
||||||
description="File processing and task dispatch service",
|
description="Real-time document processing application with authentication",
|
||||||
version="1.0.0",
|
version="0.1.0",
|
||||||
lifespan=lifespan
|
lifespan=lifespan
|
||||||
)
|
)
|
||||||
|
|
||||||
# Environment variables
|
# Configure CORS
|
||||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
app.add_middleware(
|
||||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
CORSMiddleware,
|
||||||
|
allow_origins=["http://localhost:5173", "http://localhost:5174"], # React frontend
|
||||||
# Initialize Redis client
|
allow_credentials=True,
|
||||||
try:
|
allow_methods=["*"],
|
||||||
redis_client = redis.from_url(REDIS_URL)
|
allow_headers=["*"],
|
||||||
except Exception as e:
|
|
||||||
redis_client = None
|
|
||||||
print(f"Warning: Could not connect to Redis: {e}")
|
|
||||||
|
|
||||||
# Initialize Celery
|
|
||||||
celery_app = Celery(
|
|
||||||
"file_processor",
|
|
||||||
broker=REDIS_URL,
|
|
||||||
backend=REDIS_URL
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
app.include_router(auth_router, prefix="/auth", tags=["Authentication"])
|
||||||
|
app.include_router(users_router, prefix="/users", tags=["User Management"])
|
||||||
|
app.include_router(documents_router, prefix="/api", tags=["Documents"])
|
||||||
|
# app.include_router(jobs_router, prefix="/jobs", tags=["Processing Jobs"])
|
||||||
|
|
||||||
# Pydantic models
|
|
||||||
class TestTaskRequest(BaseModel):
|
|
||||||
"""Request model for test task."""
|
|
||||||
message: str
|
|
||||||
|
|
||||||
|
|
||||||
def get_user_service() -> UserService:
|
|
||||||
"""
|
|
||||||
Dependency to get user service instance.
|
|
||||||
|
|
||||||
This should be properly implemented with database connection management
|
|
||||||
in your actual application.
|
|
||||||
"""
|
|
||||||
database = get_database()
|
|
||||||
user_repository = UserRepository(database)
|
|
||||||
return UserService(user_repository)
|
|
||||||
|
|
||||||
|
|
||||||
# Your API routes would use the service like this:
|
|
||||||
@app.post("/api/users")
|
|
||||||
async def create_user(
|
|
||||||
user_data: UserCreate,
|
|
||||||
user_service: UserService = Depends(get_user_service)
|
|
||||||
):
|
|
||||||
return user_service.create_user(user_data)
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""
|
"""
|
||||||
Health check endpoint.
|
Health check endpoint.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Service health status with dependencies
|
Dictionary containing application health status
|
||||||
"""
|
"""
|
||||||
health_status = {
|
return {
|
||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"service": "file-processor",
|
"service": "MyDocManager",
|
||||||
"dependencies": {
|
"version": "1.0.0",
|
||||||
"redis": "unknown",
|
"file_watcher_running": file_watcher.is_running() if file_watcher else False
|
||||||
"mongodb": "unknown"
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check Redis connection
|
|
||||||
if redis_client:
|
|
||||||
try:
|
|
||||||
redis_client.ping()
|
|
||||||
health_status["dependencies"]["redis"] = "connected"
|
|
||||||
except Exception:
|
|
||||||
health_status["dependencies"]["redis"] = "disconnected"
|
|
||||||
health_status["status"] = "degraded"
|
|
||||||
|
|
||||||
# check MongoDB connection
|
|
||||||
if test_database_connection():
|
|
||||||
health_status["dependencies"]["mongodb"] = "connected"
|
|
||||||
else:
|
|
||||||
health_status["dependencies"]["mongodb"] = "disconnected"
|
|
||||||
|
|
||||||
return health_status
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/test-task")
|
|
||||||
async def dispatch_test_task(request: TestTaskRequest):
|
|
||||||
"""
|
|
||||||
Dispatch a test task to Celery worker.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: Test task request containing message
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Task dispatch information
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If task dispatch fails
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Send task to worker
|
|
||||||
task = celery_app.send_task(
|
|
||||||
"main.test_task",
|
|
||||||
args=[request.message]
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"status": "dispatched",
|
|
||||||
"task_id": task.id,
|
|
||||||
"message": f"Test task dispatched with message: {request.message}"
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to dispatch task: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
"""
|
"""
|
||||||
Root endpoint.
|
Root endpoint with basic application information.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Basic service information
|
Dictionary containing welcome message and available endpoints
|
||||||
"""
|
"""
|
||||||
return {
|
return {
|
||||||
"service": "MyDocManager File Processor",
|
"message": "Welcome to MyDocManager",
|
||||||
"version": "1.0.0",
|
"description": "Real-time document processing application",
|
||||||
"status": "running"
|
"docs": "/docs",
|
||||||
|
"health": "/health"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/watcher/status")
|
||||||
|
async def watcher_status():
|
||||||
|
"""
|
||||||
|
Get file watcher status.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing file watcher status information
|
||||||
|
"""
|
||||||
|
if not file_watcher:
|
||||||
|
return {
|
||||||
|
"status": "not_initialized",
|
||||||
|
"running": False
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "initialized",
|
||||||
|
"running": file_watcher.is_running(),
|
||||||
|
"watch_directory": str(file_watcher.watch_directory),
|
||||||
|
"recursive": file_watcher.recursive
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,12 +3,45 @@ Authentication models and enums for user management.
|
|||||||
|
|
||||||
Contains user roles enumeration and authentication-related Pydantic models.
|
Contains user roles enumeration and authentication-related Pydantic models.
|
||||||
"""
|
"""
|
||||||
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
class UserRole(str, Enum):
|
class UserRole(str, Enum):
|
||||||
"""User roles enumeration with string values."""
|
"""User roles enumeration with string values."""
|
||||||
|
|
||||||
USER = "user"
|
USER = "user"
|
||||||
ADMIN = "admin"
|
ADMIN = "admin"
|
||||||
|
|
||||||
|
|
||||||
|
class UserResponse(BaseModel):
|
||||||
|
"""Model for user data in API responses (excludes password_hash)."""
|
||||||
|
|
||||||
|
id: PyObjectId = Field(alias="_id")
|
||||||
|
username: str
|
||||||
|
email: str
|
||||||
|
role: UserRole
|
||||||
|
is_active: bool
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
model_config = {
|
||||||
|
"populate_by_name": True,
|
||||||
|
"arbitrary_types_allowed": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LoginResponse(BaseModel):
|
||||||
|
"""Response model for successful login."""
|
||||||
|
access_token: str
|
||||||
|
token_type: str = "bearer"
|
||||||
|
user: UserResponse
|
||||||
|
|
||||||
|
|
||||||
|
class MessageResponse(BaseModel):
|
||||||
|
"""Generic message response."""
|
||||||
|
message: str
|
||||||
|
|||||||
@@ -7,10 +7,9 @@ stored in MongoDB collections.
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from bson import ObjectId
|
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
||||||
from pydantic import BaseModel, Field, field_validator
|
|
||||||
|
|
||||||
from app.models.types import PyObjectId
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
@@ -33,15 +32,6 @@ class ExtractionMethod(str, Enum):
|
|||||||
HYBRID = "hybrid"
|
HYBRID = "hybrid"
|
||||||
|
|
||||||
|
|
||||||
class ProcessingStatus(str, Enum):
|
|
||||||
"""Status values for processing jobs."""
|
|
||||||
|
|
||||||
PENDING = "pending"
|
|
||||||
PROCESSING = "processing"
|
|
||||||
COMPLETED = "completed"
|
|
||||||
FAILED = "failed"
|
|
||||||
|
|
||||||
|
|
||||||
class FileDocument(BaseModel):
|
class FileDocument(BaseModel):
|
||||||
"""
|
"""
|
||||||
Model for file documents stored in the 'files' collection.
|
Model for file documents stored in the 'files' collection.
|
||||||
@@ -58,6 +48,11 @@ class FileDocument(BaseModel):
|
|||||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
|
||||||
detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
|
detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
|
||||||
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
||||||
|
pdf_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the associated pdf file content")
|
||||||
|
thumbnail_file_hash: Optional[str] = Field(default=None, description="SHA256 hash of the thumbnail")
|
||||||
|
encoding: str = Field(default="utf-8", description="Character encoding for text files")
|
||||||
|
file_size: int = Field(..., ge=0, description="File size in bytes")
|
||||||
|
mime_type: str = Field(..., description="MIME type detected")
|
||||||
|
|
||||||
@field_validator('filepath')
|
@field_validator('filepath')
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -74,69 +69,28 @@ class FileDocument(BaseModel):
|
|||||||
if not v.strip():
|
if not v.strip():
|
||||||
raise ValueError("Filename cannot be empty")
|
raise ValueError("Filename cannot be empty")
|
||||||
return v.strip()
|
return v.strip()
|
||||||
|
|
||||||
class Config:
|
|
||||||
"""Pydantic configuration."""
|
|
||||||
populate_by_name = True
|
|
||||||
arbitrary_types_allowed = True
|
|
||||||
json_encoders = {ObjectId: str}
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentContent(BaseModel):
|
class DocumentResponse(BaseModel):
|
||||||
"""Model for document content."""
|
|
||||||
|
|
||||||
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
|
||||||
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
|
||||||
content: str = Field(..., description="File content")
|
|
||||||
encoding: str = Field(default="utf-8", description="Character encoding for text files")
|
|
||||||
file_size: int = Field(..., ge=0, description="File size in bytes")
|
|
||||||
mime_type: str = Field(..., description="MIME type detected")
|
|
||||||
|
|
||||||
|
|
||||||
class ProcessingJob(BaseModel):
|
|
||||||
"""
|
"""
|
||||||
Model for processing jobs stored in the 'processing_jobs' collection.
|
Response model for document API endpoints.
|
||||||
|
|
||||||
Tracks the lifecycle and status of document processing tasks.
|
Represents a document in the format expected by the frontend application.
|
||||||
|
Field names are automatically converted from snake_case to camelCase.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
model_config = ConfigDict(alias_generator=lambda field_name: ''.join(
|
||||||
file_id: PyObjectId = Field(..., description="Reference to file document")
|
word.capitalize() if i > 0 else word
|
||||||
status: ProcessingStatus = Field(
|
for i, word in enumerate(field_name.split('_'))
|
||||||
default=ProcessingStatus.PENDING,
|
), populate_by_name=True)
|
||||||
description="Current processing status"
|
|
||||||
)
|
|
||||||
task_id: Optional[str] = Field(
|
|
||||||
default=None,
|
|
||||||
description="Celery task UUID"
|
|
||||||
)
|
|
||||||
created_at: Optional[datetime] = Field(
|
|
||||||
default=None,
|
|
||||||
description="Timestamp when job was created"
|
|
||||||
)
|
|
||||||
started_at: Optional[datetime] = Field(
|
|
||||||
default=None,
|
|
||||||
description="Timestamp when processing started"
|
|
||||||
)
|
|
||||||
completed_at: Optional[datetime] = Field(
|
|
||||||
default=None,
|
|
||||||
description="Timestamp when processing completed"
|
|
||||||
)
|
|
||||||
error_message: Optional[str] = Field(
|
|
||||||
default=None,
|
|
||||||
description="Error message if processing failed"
|
|
||||||
)
|
|
||||||
|
|
||||||
@field_validator('error_message')
|
id: str = Field(..., description="Document unique identifier")
|
||||||
@classmethod
|
name: str = Field(..., description="Document filename")
|
||||||
def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
|
original_file_type: str = Field(..., description="Original file type before conversion")
|
||||||
"""Clean up error message."""
|
created_at: str = Field(..., description="ISO timestamp when document was created")
|
||||||
if v is not None:
|
file_size: int = Field(..., description="File size in bytes")
|
||||||
return v.strip() if v.strip() else None
|
page_count: int = Field(..., description="Number of pages in the document")
|
||||||
return v
|
thumbnail_url: Optional[str] = Field(default=None, description="URL to document thumbnail")
|
||||||
|
pdf_url: Optional[str] = Field(default=None, description="URL to PDF version of document")
|
||||||
class Config:
|
tags: List[str] = Field(default_factory=list, description="Document tags")
|
||||||
"""Pydantic configuration."""
|
categories: List[str] = Field(default_factory=list, description="Document categories")
|
||||||
populate_by_name = True
|
|
||||||
arbitrary_types_allowed = True
|
|
||||||
json_encoders = {ObjectId: str}
|
|
||||||
|
|||||||
@@ -0,0 +1,45 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from bson import ObjectId
|
||||||
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
|
class ProcessingStatus(str, Enum):
|
||||||
|
"""Status values for processing jobs."""
|
||||||
|
|
||||||
|
PENDING = "pending"
|
||||||
|
PROCESSING = "processing"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
SAVING_OBJECT = "saving_object"
|
||||||
|
SAVING_PDF = "saving_pdf"
|
||||||
|
CREATING_THUMBNAIL = "creating_thumbnail"
|
||||||
|
FAILED = "failed"
|
||||||
|
|
||||||
|
|
||||||
|
class ProcessingJob(BaseModel):
|
||||||
|
"""
|
||||||
|
Model for processing jobs stored in the 'processing_jobs' collection.
|
||||||
|
|
||||||
|
Tracks the lifecycle and status of document processing tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||||
|
document_id: PyObjectId = Field(..., description="Reference to file document")
|
||||||
|
status: ProcessingStatus = Field(default=ProcessingStatus.PENDING, description="Current processing status")
|
||||||
|
task_id: Optional[str] = Field(default=None, description="Celery task UUID")
|
||||||
|
created_at: Optional[datetime] = Field(default=None, description="Timestamp when job was created")
|
||||||
|
started_at: Optional[datetime] = Field(default=None, description="Timestamp when processing started")
|
||||||
|
completed_at: Optional[datetime] = Field(default=None, description="Timestamp when processing completed")
|
||||||
|
error_message: Optional[str] = Field(default=None, description="Error message if processing failed")
|
||||||
|
|
||||||
|
@field_validator('error_message')
|
||||||
|
@classmethod
|
||||||
|
def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
|
||||||
|
"""Clean up error message."""
|
||||||
|
if v is not None:
|
||||||
|
return v.strip() if v.strip() else None
|
||||||
|
return v
|
||||||
@@ -7,10 +7,10 @@ and API responses with proper validation and type safety.
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, Any
|
from typing import Optional
|
||||||
|
|
||||||
from bson import ObjectId
|
from bson import ObjectId
|
||||||
from pydantic import BaseModel, Field, field_validator, EmailStr
|
from pydantic import BaseModel, Field, field_validator, EmailStr
|
||||||
from pydantic_core import core_schema
|
|
||||||
|
|
||||||
from app.models.auth import UserRole
|
from app.models.auth import UserRole
|
||||||
from app.models.types import PyObjectId
|
from app.models.types import PyObjectId
|
||||||
@@ -105,6 +105,7 @@ class UserUpdate(BaseModel):
|
|||||||
password: Optional[str] = None
|
password: Optional[str] = None
|
||||||
role: Optional[UserRole] = None
|
role: Optional[UserRole] = None
|
||||||
is_active: Optional[bool] = None
|
is_active: Optional[bool] = None
|
||||||
|
preferences: Optional[dict] = None
|
||||||
|
|
||||||
@field_validator('username')
|
@field_validator('username')
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -130,24 +131,7 @@ class UserInDB(BaseModel):
|
|||||||
hashed_password: str
|
hashed_password: str
|
||||||
role: UserRole
|
role: UserRole
|
||||||
is_active: bool = True
|
is_active: bool = True
|
||||||
created_at: datetime
|
preferences: dict = Field(default_factory=dict)
|
||||||
updated_at: datetime
|
|
||||||
|
|
||||||
model_config = {
|
|
||||||
"populate_by_name": True,
|
|
||||||
"arbitrary_types_allowed": True,
|
|
||||||
"json_encoders": {ObjectId: str}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class UserResponse(BaseModel):
|
|
||||||
"""Model for user data in API responses (excludes password_hash)."""
|
|
||||||
|
|
||||||
id: PyObjectId = Field(alias="_id")
|
|
||||||
username: str
|
|
||||||
email: str
|
|
||||||
role: UserRole
|
|
||||||
is_active: bool
|
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
updated_at: datetime
|
updated_at: datetime
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,11 @@ Authentication service for password hashing and verification.
|
|||||||
This module provides authentication-related functionality including
|
This module provides authentication-related functionality including
|
||||||
password hashing, verification, and JWT token management.
|
password hashing, verification, and JWT token management.
|
||||||
"""
|
"""
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
import jwt
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
from app.utils.security import hash_password, verify_password
|
from app.utils.security import hash_password, verify_password
|
||||||
|
|
||||||
|
|
||||||
@@ -55,4 +59,26 @@ class AuthService:
|
|||||||
>>> auth.verify_user_password("wrongpassword", hashed)
|
>>> auth.verify_user_password("wrongpassword", hashed)
|
||||||
False
|
False
|
||||||
"""
|
"""
|
||||||
return verify_password(password, hashed_password)
|
return verify_password(password, hashed_password)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_access_token(data=dict) -> str:
|
||||||
|
"""
|
||||||
|
Create a JWT access token.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data (dict): Payload data to include in the token.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Encoded JWT token.
|
||||||
|
"""
|
||||||
|
# Copy data to avoid modifying the original dict
|
||||||
|
to_encode = data.copy()
|
||||||
|
|
||||||
|
# Add expiration time
|
||||||
|
expire = datetime.now() + timedelta(hours=settings.get_jwt_expire_hours())
|
||||||
|
to_encode.update({"exp": expire})
|
||||||
|
|
||||||
|
# Encode JWT
|
||||||
|
encoded_jwt = jwt.encode(to_encode, settings.get_jwt_secret_key(), algorithm=settings.get_jwt_algorithm())
|
||||||
|
return encoded_jwt
|
||||||
|
|||||||
@@ -6,24 +6,38 @@ while maintaining data consistency through MongoDB transactions.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import magic
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Dict, Any, Tuple
|
from typing import List, Optional, Dict, Any
|
||||||
|
|
||||||
from motor.motor_asyncio import AsyncIOMotorClientSession
|
import magic
|
||||||
from pymongo.errors import PyMongoError
|
from pymongo.errors import PyMongoError
|
||||||
|
|
||||||
from app.database.connection import get_database
|
from app.config.settings import get_objects_folder, get_temp_folder, get_errors_folder, get_ignored_folder
|
||||||
from app.database.repositories.document_repository import FileDocumentRepository
|
from app.database.repositories.document_repository import FileDocumentRepository
|
||||||
from app.database.repositories.document_content_repository import DocumentContentRepository
|
|
||||||
from app.models.document import (
|
from app.models.document import (
|
||||||
FileDocument,
|
FileDocument,
|
||||||
DocumentContent,
|
|
||||||
FileType,
|
FileType,
|
||||||
ProcessingStatus
|
|
||||||
)
|
)
|
||||||
from app.models.types import PyObjectId
|
from app.models.types import PyObjectId
|
||||||
|
from app.utils.pdf_converter import convert_to_pdf
|
||||||
|
from app.utils.pdf_thumbmail import PDFThumbnailGenerator
|
||||||
|
from app.utils.security import generate_uuid_filename
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentAlreadyExists(Exception):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentProcessingError(Exception):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class DocumentService:
|
class DocumentService:
|
||||||
@@ -34,13 +48,32 @@ class DocumentService:
|
|||||||
and their content while ensuring data consistency through transactions.
|
and their content while ensuring data consistency through transactions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, database,
|
||||||
"""Initialize the document service with repository dependencies."""
|
objects_folder: str = None,
|
||||||
self.db = get_database()
|
temp_folder: str = None,
|
||||||
self.file_repository = FileDocumentRepository(self.db)
|
errors_folder: str = None,
|
||||||
self.content_repository = DocumentContentRepository(self.db)
|
ignored_folder: str = None):
|
||||||
|
"""
|
||||||
|
Initialize the document service with repository dependencies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
database: Database instance
|
||||||
|
objects_folder: folder to store files by their hash
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.db = database
|
||||||
|
self.document_repository = FileDocumentRepository(self.db)
|
||||||
|
self.objects_folder = objects_folder or get_objects_folder()
|
||||||
|
self.temp_folder = temp_folder or get_temp_folder()
|
||||||
|
self.errors_folder = errors_folder or get_errors_folder()
|
||||||
|
self.ignored_folder = ignored_folder or get_ignored_folder()
|
||||||
|
|
||||||
def _calculate_file_hash(self, file_bytes: bytes) -> str:
|
def initialize(self):
|
||||||
|
self.document_repository.initialize()
|
||||||
|
return self
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _calculate_file_hash(file_bytes: bytes) -> str:
|
||||||
"""
|
"""
|
||||||
Calculate SHA256 hash of file content.
|
Calculate SHA256 hash of file content.
|
||||||
|
|
||||||
@@ -52,7 +85,8 @@ class DocumentService:
|
|||||||
"""
|
"""
|
||||||
return hashlib.sha256(file_bytes).hexdigest()
|
return hashlib.sha256(file_bytes).hexdigest()
|
||||||
|
|
||||||
def _detect_file_type(self, file_path: str) -> FileType:
|
@staticmethod
|
||||||
|
def _detect_file_type(file_path: str) -> FileType:
|
||||||
"""
|
"""
|
||||||
Detect file type from file extension.
|
Detect file type from file extension.
|
||||||
|
|
||||||
@@ -72,7 +106,8 @@ class DocumentService:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
raise ValueError(f"Unsupported file type: {extension}")
|
raise ValueError(f"Unsupported file type: {extension}")
|
||||||
|
|
||||||
def _detect_mime_type(self, file_bytes: bytes) -> str:
|
@staticmethod
|
||||||
|
def _detect_mime_type(file_bytes: bytes) -> str:
|
||||||
"""
|
"""
|
||||||
Detect MIME type from file content.
|
Detect MIME type from file content.
|
||||||
|
|
||||||
@@ -84,10 +119,102 @@ class DocumentService:
|
|||||||
"""
|
"""
|
||||||
return magic.from_buffer(file_bytes, mime=True)
|
return magic.from_buffer(file_bytes, mime=True)
|
||||||
|
|
||||||
async def create_document(
|
@staticmethod
|
||||||
|
def _read_file_bytes(file_path: str | Path) -> bytes:
|
||||||
|
"""
|
||||||
|
Read file content as bytes asynchronously.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path (str | Path): Path of the file to read
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bytes: Content of the file
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If the file does not exist
|
||||||
|
OSError: If any I/O error occurs
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"File not found: {file_path}")
|
||||||
|
|
||||||
|
return path.read_bytes()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_safe_path(file_path):
|
||||||
|
"""
|
||||||
|
If the path already exists, add a suffix to the filename.
|
||||||
|
Increment the suffix until a safe path is found.
|
||||||
|
:param file_path:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
|
||||||
|
# If the path doesn't exist, return it as is
|
||||||
|
if not path.exists():
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
# Split the filename and extension
|
||||||
|
stem = path.stem
|
||||||
|
suffix = path.suffix
|
||||||
|
directory = path.parent
|
||||||
|
|
||||||
|
# Try incrementing numbers until a unique path is found
|
||||||
|
counter = 1
|
||||||
|
while True:
|
||||||
|
# Create new filename with counter
|
||||||
|
new_filename = f"{stem}_{counter}{suffix}"
|
||||||
|
new_path = os.path.join(directory, new_filename)
|
||||||
|
|
||||||
|
# Check if this new path exists
|
||||||
|
if not os.path.exists(new_path):
|
||||||
|
return new_path
|
||||||
|
|
||||||
|
# Increment counter for next attempt
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
def get_document_path(self, file_hash):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param file_hash:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
return os.path.join(self.objects_folder, file_hash[:24], file_hash)
|
||||||
|
|
||||||
|
def exists(self, file_hash):
|
||||||
|
if file_hash is None:
|
||||||
|
return False
|
||||||
|
return os.path.exists(self.get_document_path(file_hash))
|
||||||
|
|
||||||
|
def save_content_if_needed(self, file_hash, content: bytes):
|
||||||
|
target_path = self.get_document_path(file_hash)
|
||||||
|
if os.path.exists(target_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
if not os.path.exists(os.path.dirname(target_path)):
|
||||||
|
os.makedirs(os.path.dirname(target_path))
|
||||||
|
|
||||||
|
with open(target_path, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
def move_to_errors(self, document_id, file_path):
|
||||||
|
logger.info(f"Moving file {file_path} to error folder")
|
||||||
|
error_file_name = f"{document_id}_{os.path.basename(file_path)}"
|
||||||
|
error_file_path = self._get_safe_path(os.path.join(self.errors_folder, error_file_name))
|
||||||
|
shutil.move(file_path, error_file_path)
|
||||||
|
|
||||||
|
def move_to_ignored(self, file_path, reason="Unknown"):
|
||||||
|
logger.info(f"Moving file {file_path} to ignored folder")
|
||||||
|
ignored_file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_### {reason} ###_" + os.path.basename(
|
||||||
|
file_path)
|
||||||
|
ignored_file_path = self._get_safe_path(os.path.join(self.ignored_folder, ignored_file_name))
|
||||||
|
shutil.move(file_path, ignored_file_path)
|
||||||
|
|
||||||
|
def create_document(
|
||||||
self,
|
self,
|
||||||
file_path: str,
|
file_path: str,
|
||||||
file_bytes: bytes,
|
file_bytes: bytes | None = None,
|
||||||
encoding: str = "utf-8"
|
encoding: str = "utf-8"
|
||||||
) -> FileDocument:
|
) -> FileDocument:
|
||||||
"""
|
"""
|
||||||
@@ -110,57 +237,125 @@ class DocumentService:
|
|||||||
PyMongoError: If database operation fails
|
PyMongoError: If database operation fails
|
||||||
"""
|
"""
|
||||||
# Calculate automatic attributes
|
# Calculate automatic attributes
|
||||||
|
file_bytes = file_bytes if file_bytes is not None else self._read_file_bytes(file_path)
|
||||||
file_hash = self._calculate_file_hash(file_bytes)
|
file_hash = self._calculate_file_hash(file_bytes)
|
||||||
file_type = self._detect_file_type(file_path)
|
file_type = self._detect_file_type(file_path)
|
||||||
mime_type = self._detect_mime_type(file_bytes)
|
mime_type = self._detect_mime_type(file_bytes)
|
||||||
file_size = len(file_bytes)
|
file_size = len(file_bytes)
|
||||||
filename = Path(file_path).name
|
filename = Path(file_path).name
|
||||||
detected_at = datetime.utcnow()
|
detected_at = datetime.now()
|
||||||
|
|
||||||
# Start MongoDB transaction
|
try:
|
||||||
async with await self.db.client.start_session() as session:
|
logger.info(f'Creating Document for "{file_path}"')
|
||||||
async with session.start_transaction():
|
# Skip the document if it already exists
|
||||||
try:
|
same_document = self.document_repository.find_same_document(filename, file_hash)
|
||||||
# Check if content already exists
|
if same_document is not None:
|
||||||
existing_content = await self.content_repository.find_document_content_by_file_hash(
|
logger.info(f" Document with same hash already exists. Skipping...")
|
||||||
file_hash, session=session
|
self.move_to_ignored(file_path, f"already exists ({same_document.id})")
|
||||||
)
|
raise DocumentAlreadyExists(f"Document with same hash already exists ({same_document.id})")
|
||||||
|
|
||||||
# Create DocumentContent if it doesn't exist
|
self.save_content_if_needed(file_hash, file_bytes)
|
||||||
if not existing_content:
|
logger.info(f" Saved content to {self.get_document_path(file_hash)}")
|
||||||
content_data = DocumentContent(
|
|
||||||
file_hash=file_hash,
|
# Create FileDocument
|
||||||
content="", # Will be populated by processing workers
|
file_data = FileDocument(
|
||||||
encoding=encoding,
|
filename=filename,
|
||||||
file_size=file_size,
|
filepath=file_path,
|
||||||
mime_type=mime_type
|
file_type=file_type,
|
||||||
)
|
extraction_method=None, # Will be set by processing workers
|
||||||
await self.content_repository.create_document_content(
|
metadata={}, # Empty for now
|
||||||
content_data, session=session
|
detected_at=detected_at,
|
||||||
)
|
file_hash=file_hash,
|
||||||
|
encoding=encoding,
|
||||||
# Create FileDocument
|
file_size=file_size,
|
||||||
file_data = FileDocument(
|
mime_type=mime_type
|
||||||
filename=filename,
|
)
|
||||||
filepath=file_path,
|
|
||||||
file_type=file_type,
|
created_document = self.document_repository.create_document(file_data)
|
||||||
extraction_method=None, # Will be set by processing workers
|
logger.info(f" Created document with id '{created_document.id}'")
|
||||||
metadata={}, # Empty for now
|
|
||||||
detected_at=detected_at,
|
return created_document
|
||||||
file_hash=file_hash
|
|
||||||
)
|
except DocumentAlreadyExists as e:
|
||||||
|
raise e
|
||||||
created_file = await self.file_repository.create_document(
|
except Exception as e:
|
||||||
file_data, session=session
|
# Transaction will automatically rollback if supported
|
||||||
)
|
raise PyMongoError(f"Failed to create document: {str(e)}")
|
||||||
|
|
||||||
return created_file
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# Transaction will automatically rollback
|
|
||||||
raise PyMongoError(f"Failed to create document: {str(e)}")
|
|
||||||
|
|
||||||
async def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
|
def create_pdf(self, document_id: PyObjectId):
|
||||||
|
"""
|
||||||
|
For all files, a controlled pdf version will be created for standard visualization and action
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
logger.info(f"Creating PDF document for {document_id}")
|
||||||
|
document = self.get_document_by_id(document_id)
|
||||||
|
if document is None:
|
||||||
|
logger.error(f" Document not found")
|
||||||
|
raise DocumentProcessingError(f"Document {document_id} not found.")
|
||||||
|
|
||||||
|
# try to find another document that has the same hash
|
||||||
|
document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
|
||||||
|
|
||||||
|
# the pdf will be created only if it does not exist yet
|
||||||
|
if document_with_same_hash and self.exists(document_with_same_hash.pdf_file_hash):
|
||||||
|
logger.info(f'Found document with same hash. Will use pdf "{document_with_same_hash.pdf_file_hash}".')
|
||||||
|
self.update_document(document_id, {"pdf_file_hash": document_with_same_hash.pdf_file_hash})
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the content of the file
|
||||||
|
logger.info(f" No document with same hash and valid pdf found. Will create new pdf content.")
|
||||||
|
file_bytes = self.get_document_content_by_hash(document.file_hash)
|
||||||
|
if file_bytes is None:
|
||||||
|
logger.error(f'Content for document "{document_id}" not found. hash = "{document.file_hash}".')
|
||||||
|
raise DocumentProcessingError(f'Content for document "{document_id}" not found. hash = "{document.file_hash}".')
|
||||||
|
|
||||||
|
# create the pdf file
|
||||||
|
temp_pdf_file = convert_to_pdf(self.get_document_path(document.file_hash), self.temp_folder)
|
||||||
|
pdf_file_hash = self._calculate_file_hash(self._read_file_bytes(temp_pdf_file))
|
||||||
|
self.save_content_if_needed(pdf_file_hash, self._read_file_bytes(temp_pdf_file))
|
||||||
|
os.remove(temp_pdf_file) # remove the temporary file
|
||||||
|
logger.info(f' Created new pdf file with hash "{pdf_file_hash}"')
|
||||||
|
|
||||||
|
# update the document
|
||||||
|
self.update_document(document_id, {"pdf_file_hash": pdf_file_hash})
|
||||||
|
|
||||||
|
def create_thumbnail(self, document_id: PyObjectId):
|
||||||
|
logger.info(f'Creating thumbnail document for "{document_id}"')
|
||||||
|
document = self.get_document_by_id(document_id)
|
||||||
|
if document is None:
|
||||||
|
logger.error(f" Document not found !")
|
||||||
|
raise DocumentProcessingError(f"Document {document_id} not found.")
|
||||||
|
|
||||||
|
# try to find another document that has the same hash
|
||||||
|
document_with_same_hash = self.get_document_with_pdf_hash(document.file_hash)
|
||||||
|
|
||||||
|
# We will use the thumbnail of the pdf if it exists
|
||||||
|
if document_with_same_hash and self.exists(document_with_same_hash.thumbnail_file_hash):
|
||||||
|
logger.info(f" Found document with same hash. Will use thumbnail {document_with_same_hash.thumbnail_file_hash}")
|
||||||
|
self.update_document(document_id, {"thumbnail_file_hash": document_with_same_hash.thumbnail_file_hash})
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f" No document with same hash and valid thumbnail found. Will create new thumbnail")
|
||||||
|
|
||||||
|
if not self.exists(document.pdf_file_hash):
|
||||||
|
logger.error(f" PDF file not found.")
|
||||||
|
raise DocumentProcessingError(f"PDF file for document {document_id} not found")
|
||||||
|
|
||||||
|
tmp_thumbnail_path = os.path.join(self.temp_folder, f"{generate_uuid_filename()}.png")
|
||||||
|
with PDFThumbnailGenerator(self.get_document_path(document.pdf_file_hash)) as gen:
|
||||||
|
# create the thumbnail
|
||||||
|
gen.create_thumbnail(tmp_thumbnail_path, page_num=0, width=200)
|
||||||
|
thumbnail_file_hash = self._calculate_file_hash(self._read_file_bytes(tmp_thumbnail_path))
|
||||||
|
|
||||||
|
# save the thumbnail to the objects folder
|
||||||
|
self.save_content_if_needed(thumbnail_file_hash, self._read_file_bytes(tmp_thumbnail_path))
|
||||||
|
os.remove(tmp_thumbnail_path)
|
||||||
|
|
||||||
|
# update the document
|
||||||
|
self.update_document(document_id, {"thumbnail_file_hash": thumbnail_file_hash})
|
||||||
|
logger.info(f" Created thumbnail {thumbnail_file_hash}")
|
||||||
|
|
||||||
|
def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Retrieve a document by its ID.
|
Retrieve a document by its ID.
|
||||||
|
|
||||||
@@ -170,9 +365,9 @@ class DocumentService:
|
|||||||
Returns:
|
Returns:
|
||||||
FileDocument if found, None otherwise
|
FileDocument if found, None otherwise
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.find_document_by_id(document_id)
|
return self.document_repository.find_document_by_id(str(document_id))
|
||||||
|
|
||||||
async def get_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
|
def get_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Retrieve a document by its file hash.
|
Retrieve a document by its file hash.
|
||||||
|
|
||||||
@@ -182,9 +377,12 @@ class DocumentService:
|
|||||||
Returns:
|
Returns:
|
||||||
FileDocument if found, None otherwise
|
FileDocument if found, None otherwise
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.find_document_by_hash(file_hash)
|
return self.document_repository.find_document_by_hash(file_hash)
|
||||||
|
|
||||||
async def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
def get_document_with_pdf_hash(self, file_hash) -> Optional[FileDocument]:
|
||||||
|
return self.document_repository.find_document_with_pdf_hash(file_hash)
|
||||||
|
|
||||||
|
def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
||||||
"""
|
"""
|
||||||
Retrieve a document by its file path.
|
Retrieve a document by its file path.
|
||||||
|
|
||||||
@@ -194,34 +392,17 @@ class DocumentService:
|
|||||||
Returns:
|
Returns:
|
||||||
FileDocument if found, None otherwise
|
FileDocument if found, None otherwise
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.find_document_by_filepath(filepath)
|
return self.document_repository.find_document_by_filepath(filepath)
|
||||||
|
|
||||||
async def get_document_with_content(
|
def get_document_content_by_hash(self, file_hash):
|
||||||
self,
|
target_path = self.get_document_path(file_hash)
|
||||||
document_id: PyObjectId
|
if not os.path.exists(target_path):
|
||||||
) -> Optional[Tuple[FileDocument, DocumentContent]]:
|
|
||||||
"""
|
|
||||||
Retrieve a document with its associated content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document_id: Document ObjectId
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (FileDocument, DocumentContent) if found, None otherwise
|
|
||||||
"""
|
|
||||||
document = await self.get_document_by_id(document_id)
|
|
||||||
if not document:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = await self.content_repository.find_document_content_by_file_hash(
|
with open(target_path, "rb") as f:
|
||||||
document.file_hash
|
return f.read()
|
||||||
)
|
|
||||||
if not content:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return (document, content)
|
|
||||||
|
|
||||||
async def list_documents(
|
def list_documents(
|
||||||
self,
|
self,
|
||||||
skip: int = 0,
|
skip: int = 0,
|
||||||
limit: int = 100
|
limit: int = 100
|
||||||
@@ -236,18 +417,18 @@ class DocumentService:
|
|||||||
Returns:
|
Returns:
|
||||||
List of FileDocument instances
|
List of FileDocument instances
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.list_documents(skip=skip, limit=limit)
|
return self.document_repository.list_documents(skip=skip, limit=limit)
|
||||||
|
|
||||||
async def count_documents(self) -> int:
|
def count_documents(self) -> int:
|
||||||
"""
|
"""
|
||||||
Get total number of documents.
|
Get total number of documents.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Total document count
|
Total document count
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.count_documents()
|
return self.document_repository.count_documents()
|
||||||
|
|
||||||
async def update_document(
|
def update_document(
|
||||||
self,
|
self,
|
||||||
document_id: PyObjectId,
|
document_id: PyObjectId,
|
||||||
update_data: Dict[str, Any]
|
update_data: Dict[str, Any]
|
||||||
@@ -262,9 +443,14 @@ class DocumentService:
|
|||||||
Returns:
|
Returns:
|
||||||
Updated FileDocument if found, None otherwise
|
Updated FileDocument if found, None otherwise
|
||||||
"""
|
"""
|
||||||
return await self.file_repository.update_document(document_id, update_data)
|
if "file_bytes" in update_data:
|
||||||
|
file_hash = self._calculate_file_hash(update_data["file_bytes"])
|
||||||
|
update_data["file_hash"] = file_hash
|
||||||
|
self.save_content_if_needed(file_hash, update_data["file_bytes"])
|
||||||
|
|
||||||
|
return self.document_repository.update_document(document_id, update_data)
|
||||||
|
|
||||||
async def delete_document(self, document_id: PyObjectId) -> bool:
|
def delete_document(self, document_id: PyObjectId) -> bool:
|
||||||
"""
|
"""
|
||||||
Delete a document and its orphaned content.
|
Delete a document and its orphaned content.
|
||||||
|
|
||||||
@@ -281,100 +467,31 @@ class DocumentService:
|
|||||||
Raises:
|
Raises:
|
||||||
PyMongoError: If database operation fails
|
PyMongoError: If database operation fails
|
||||||
"""
|
"""
|
||||||
# Start MongoDB transaction
|
# Start transaction
|
||||||
async with await self.db.client.start_session() as session:
|
|
||||||
async with session.start_transaction():
|
try:
|
||||||
|
# Get document to find its hash
|
||||||
|
document = self.document_repository.find_document_by_id(document_id)
|
||||||
|
if not document:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Delete the document
|
||||||
|
deleted = self.document_repository.delete_document(document_id)
|
||||||
|
if not deleted:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if content is orphaned
|
||||||
|
remaining_files = self.document_repository.find_document_by_hash(document.file_hash)
|
||||||
|
|
||||||
|
# If no other files reference this content, delete it
|
||||||
|
if not remaining_files:
|
||||||
try:
|
try:
|
||||||
# Get document to find its hash
|
os.remove(self.get_document_path(document.file_hash))
|
||||||
document = await self.file_repository.find_document_by_id(
|
except Exception:
|
||||||
document_id, session=session
|
pass
|
||||||
)
|
|
||||||
if not document:
|
return True
|
||||||
return False
|
|
||||||
|
|
||||||
# Delete the document
|
|
||||||
deleted = await self.file_repository.delete_document(
|
|
||||||
document_id, session=session
|
|
||||||
)
|
|
||||||
if not deleted:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check if content is orphaned
|
|
||||||
remaining_files = await self.file_repository.find_document_by_hash(
|
|
||||||
document.file_hash, session=session
|
|
||||||
)
|
|
||||||
|
|
||||||
# If no other files reference this content, delete it
|
|
||||||
if not remaining_files:
|
|
||||||
content = await self.content_repository.find_document_content_by_file_hash(
|
|
||||||
document.file_hash, session=session
|
|
||||||
)
|
|
||||||
if content:
|
|
||||||
await self.content_repository.delete_document_content(
|
|
||||||
content.id, session=session
|
|
||||||
)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# Transaction will automatically rollback
|
|
||||||
raise PyMongoError(f"Failed to delete document: {str(e)}")
|
|
||||||
|
|
||||||
async def content_exists(self, file_hash: str) -> bool:
|
|
||||||
"""
|
|
||||||
Check if content with given hash exists.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_hash: SHA256 hash of file content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if content exists, False otherwise
|
|
||||||
"""
|
|
||||||
return await self.content_repository.content_exists(file_hash)
|
|
||||||
|
|
||||||
async def get_content_by_hash(self, file_hash: str) -> Optional[DocumentContent]:
|
|
||||||
"""
|
|
||||||
Retrieve content by file hash.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_hash: SHA256 hash of file content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
DocumentContent if found, None otherwise
|
|
||||||
"""
|
|
||||||
return await self.content_repository.find_document_content_by_file_hash(file_hash)
|
|
||||||
|
|
||||||
async def update_document_content(
|
|
||||||
self,
|
|
||||||
file_hash: str,
|
|
||||||
content: str,
|
|
||||||
encoding: str = "utf-8"
|
|
||||||
) -> Optional[DocumentContent]:
|
|
||||||
"""
|
|
||||||
Update the extracted content for a document.
|
|
||||||
|
|
||||||
This method is typically called by processing workers to store
|
|
||||||
the extracted text content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_hash: SHA256 hash of file content
|
|
||||||
content: Extracted text content
|
|
||||||
encoding: Character encoding
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Updated DocumentContent if found, None otherwise
|
|
||||||
"""
|
|
||||||
existing_content = await self.content_repository.find_document_content_by_file_hash(
|
|
||||||
file_hash
|
|
||||||
)
|
|
||||||
if not existing_content:
|
|
||||||
return None
|
|
||||||
|
|
||||||
update_data = {
|
except Exception as e:
|
||||||
"content": content,
|
# Transaction will automatically rollback if supported
|
||||||
"encoding": encoding
|
raise PyMongoError(f"Failed to delete document: {str(e)}")
|
||||||
}
|
|
||||||
|
|
||||||
return await self.content_repository.update_document_content(
|
|
||||||
existing_content.id, update_data
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ creating default admin user if none exists.
|
|||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from app.models.user import UserCreate, UserInDB, UserCreateNoValidation
|
|
||||||
from app.models.auth import UserRole
|
from app.models.auth import UserRole
|
||||||
|
from app.models.user import UserInDB, UserCreateNoValidation
|
||||||
from app.services.user_service import UserService
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -31,7 +31,6 @@ class InitializationService:
|
|||||||
user_service (UserService): Service for user operations
|
user_service (UserService): Service for user operations
|
||||||
"""
|
"""
|
||||||
self.user_service = user_service
|
self.user_service = user_service
|
||||||
|
|
||||||
|
|
||||||
def ensure_admin_user_exists(self) -> Optional[UserInDB]:
|
def ensure_admin_user_exists(self) -> Optional[UserInDB]:
|
||||||
"""
|
"""
|
||||||
@@ -131,4 +130,23 @@ class InitializationService:
|
|||||||
logger.error(error_msg)
|
logger.error(error_msg)
|
||||||
initialization_summary["errors"].append(error_msg)
|
initialization_summary["errors"].append(error_msg)
|
||||||
|
|
||||||
return initialization_summary
|
self.log_initialization_result(initialization_summary)
|
||||||
|
|
||||||
|
return initialization_summary
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def log_initialization_result(summary: dict) -> None:
|
||||||
|
"""
|
||||||
|
Log the result of the initialization process.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
summary (dict): Summary of initialization tasks performed
|
||||||
|
"""
|
||||||
|
if summary["initialization_success"]:
|
||||||
|
logger.info("Application startup completed successfully")
|
||||||
|
if summary["admin_user_created"]:
|
||||||
|
logger.info("Default admin user was created during startup")
|
||||||
|
else:
|
||||||
|
logger.error("Application startup completed with errors:")
|
||||||
|
for error in summary["errors"]:
|
||||||
|
logger.error(f" - {error}")
|
||||||
|
|||||||
189
src/file-processor/app/services/job_service.py
Normal file
189
src/file-processor/app/services/job_service.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
"""
|
||||||
|
Service layer for job processing business logic.
|
||||||
|
|
||||||
|
This module provides high-level operations for managing processing jobs
|
||||||
|
with strict status transition validation and business rules enforcement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from app.database.repositories.job_repository import JobRepository
|
||||||
|
from app.exceptions.job_exceptions import InvalidStatusTransitionError
|
||||||
|
from app.models.job import ProcessingJob, ProcessingStatus
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
|
class JobService:
|
||||||
|
"""
|
||||||
|
Service for processing job business logic operations.
|
||||||
|
|
||||||
|
Provides high-level job management with strict status transition
|
||||||
|
validation and business rule enforcement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, database):
|
||||||
|
"""
|
||||||
|
Initialize service with job repository.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository: Optional JobRepository instance (creates default if None)
|
||||||
|
"""
|
||||||
|
self.db = database
|
||||||
|
self.repository = JobRepository(database)
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.repository.initialize()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def create_job(self, document_id: PyObjectId, task_id: Optional[str] = None) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Create a new processing job.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document_id: Reference to the file document
|
||||||
|
task_id: Optional Celery task UUID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The created ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
return self.repository.create_job(document_id, task_id)
|
||||||
|
|
||||||
|
def get_job_by_id(self, job_id: PyObjectId) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Retrieve a job by its ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ProcessingJob document
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
return self.repository.find_job_by_id(job_id)
|
||||||
|
|
||||||
|
def mark_job_as_started(self, job_id: PyObjectId) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Mark a job as started (PENDING → PROCESSING).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The updated ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
InvalidStatusTransitionError: If job is not in PENDING status
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
# Get current job to validate transition
|
||||||
|
current_job = self.repository.find_job_by_id(job_id)
|
||||||
|
|
||||||
|
# Validate status transition
|
||||||
|
if current_job.status != ProcessingStatus.PENDING:
|
||||||
|
raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.PROCESSING)
|
||||||
|
|
||||||
|
# Update status
|
||||||
|
return self.repository.update_job_status(job_id, ProcessingStatus.PROCESSING)
|
||||||
|
|
||||||
|
def mark_job_as_completed(self, job_id: PyObjectId) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Mark a job as completed (PROCESSING → COMPLETED).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The updated ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
InvalidStatusTransitionError: If job is not in PROCESSING status
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
# Get current job to validate transition
|
||||||
|
current_job = self.repository.find_job_by_id(job_id)
|
||||||
|
|
||||||
|
# Validate status transition
|
||||||
|
if current_job.status in (ProcessingStatus.PENDING,
|
||||||
|
ProcessingStatus.COMPLETED,
|
||||||
|
ProcessingStatus.FAILED):
|
||||||
|
raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
# Update status
|
||||||
|
return self.repository.update_job_status(job_id, ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
def mark_job_as_failed(
|
||||||
|
self,
|
||||||
|
job_id: PyObjectId,
|
||||||
|
error_message: Optional[str] = None
|
||||||
|
) -> ProcessingJob:
|
||||||
|
"""
|
||||||
|
Mark a job as failed (PROCESSING → FAILED).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
error_message: Optional error description
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The updated ProcessingJob
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobNotFoundError: If job doesn't exist
|
||||||
|
InvalidStatusTransitionError: If job is not in PROCESSING status
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
# Get current job to validate transition
|
||||||
|
current_job = self.repository.find_job_by_id(job_id)
|
||||||
|
|
||||||
|
# Validate status transition
|
||||||
|
if current_job.status in (ProcessingStatus.PENDING, ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
|
||||||
|
raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
|
||||||
|
|
||||||
|
# Update status with error message
|
||||||
|
return self.repository.update_job_status(
|
||||||
|
job_id,
|
||||||
|
ProcessingStatus.FAILED,
|
||||||
|
error_message
|
||||||
|
)
|
||||||
|
|
||||||
|
def update_job_status(self, job_id: PyObjectId,
|
||||||
|
status: ProcessingStatus,
|
||||||
|
error_message: str = None) -> ProcessingJob:
|
||||||
|
return self.repository.update_job_status(job_id, status, error_message)
|
||||||
|
|
||||||
|
def delete_job(self, job_id: PyObjectId) -> bool:
|
||||||
|
"""
|
||||||
|
Delete a job from the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: The job ObjectId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if job was deleted, False if not found
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
return self.repository.delete_job(job_id)
|
||||||
|
|
||||||
|
def get_jobs_by_status(self, status: ProcessingStatus) -> list[ProcessingJob]:
|
||||||
|
"""
|
||||||
|
Retrieve all jobs with a specific status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: The processing status to filter by
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ProcessingJob documents
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
JobRepositoryError: If database operation fails
|
||||||
|
"""
|
||||||
|
return self.repository.get_jobs_by_status(status)
|
||||||
@@ -6,11 +6,11 @@ retrieval, updates, and authentication operations with proper error handling.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
|
|
||||||
from pymongo.errors import DuplicateKeyError
|
from pymongo.errors import DuplicateKeyError
|
||||||
|
|
||||||
from app.models.user import UserCreate, UserInDB, UserUpdate, UserResponse, UserCreateNoValidation
|
|
||||||
from app.models.auth import UserRole
|
|
||||||
from app.database.repositories.user_repository import UserRepository
|
from app.database.repositories.user_repository import UserRepository
|
||||||
|
from app.models.user import UserCreate, UserInDB, UserUpdate, UserCreateNoValidation
|
||||||
from app.services.auth_service import AuthService
|
from app.services.auth_service import AuthService
|
||||||
|
|
||||||
|
|
||||||
@@ -22,16 +22,21 @@ class UserService:
|
|||||||
authentication, and data management with proper validation.
|
authentication, and data management with proper validation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, user_repository: UserRepository):
|
def __init__(self, database):
|
||||||
"""
|
"""
|
||||||
Initialize user service with repository dependency.
|
Initialize user service with repository dependency.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
user_repository (UserRepository): Repository for user data operations
|
user_repository (UserRepository): Repository for user data operations
|
||||||
"""
|
"""
|
||||||
self.user_repository = user_repository
|
self.db = database
|
||||||
|
self.user_repository = UserRepository(self.db)
|
||||||
self.auth_service = AuthService()
|
self.auth_service = AuthService()
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.user_repository.initialize()
|
||||||
|
return self
|
||||||
|
|
||||||
def create_user(self, user_data: UserCreate | UserCreateNoValidation) -> UserInDB:
|
def create_user(self, user_data: UserCreate | UserCreateNoValidation) -> UserInDB:
|
||||||
"""
|
"""
|
||||||
Create a new user with business logic validation.
|
Create a new user with business logic validation.
|
||||||
@@ -179,3 +184,18 @@ class UserService:
|
|||||||
bool: True if user exists, False otherwise
|
bool: True if user exists, False otherwise
|
||||||
"""
|
"""
|
||||||
return self.user_repository.user_exists(username)
|
return self.user_repository.user_exists(username)
|
||||||
|
|
||||||
|
def get_preference(self, user_id: str, preference):
|
||||||
|
user = self.get_user_by_id(user_id)
|
||||||
|
if user is None:
|
||||||
|
return None
|
||||||
|
return user.preferences.get(preference, None)
|
||||||
|
|
||||||
|
def set_preference(self, user_id: str, preference, value):
|
||||||
|
user = self.get_user_by_id(user_id)
|
||||||
|
if user is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
user.preferences[preference] = value
|
||||||
|
self.user_repository.update_user(user_id, UserUpdate(preferences=user.preferences))
|
||||||
|
return self.get_user_by_id(user_id)
|
||||||
|
|||||||
241
src/file-processor/app/utils/pdf_annotation.py
Normal file
241
src/file-processor/app/utils/pdf_annotation.py
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
|
|
||||||
|
class PDFAnnotator:
|
||||||
|
def __init__(self, pdf_path):
|
||||||
|
self.doc = fitz.open(pdf_path)
|
||||||
|
|
||||||
|
def add_highlight(self, rect, page_num=0, color=(1, 1, 0)):
|
||||||
|
"""
|
||||||
|
Add highlight annotation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rect: (x0, y0, x1, y1) coordinates or fitz.Rect object
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
color: RGB tuple (0-1 range), default yellow
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_highlight_annot(rect)
|
||||||
|
annot.set_colors(stroke=color)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_rectangle(self, rect, page_num=0, color=(1, 0, 0), width=2):
|
||||||
|
"""
|
||||||
|
Add rectangle annotation (border only)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rect: (x0, y0, x1, y1) coordinates or fitz.Rect object
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
color: RGB tuple (0-1 range), default red
|
||||||
|
width: Line width in points
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_rect_annot(rect)
|
||||||
|
annot.set_colors(stroke=color)
|
||||||
|
annot.set_border(width=width)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_text_note(self, point, text, page_num=0, icon="Note"):
|
||||||
|
"""
|
||||||
|
Add sticky note annotation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
point: (x, y) position tuple
|
||||||
|
text: Note content string
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
icon: "Note", "Comment", "Help", "Insert", "Key", etc.
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_text_annot(point, text, icon=icon)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_free_text(self, rect, text, page_num=0, fontsize=12,
|
||||||
|
color=(0, 0, 0)):
|
||||||
|
"""
|
||||||
|
Add free text annotation (visible text box)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rect: (x0, y0, x1, y1) bounding box tuple or fitz.Rect
|
||||||
|
text: Text content string
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
fontsize: Font size in points
|
||||||
|
color: Text color RGB tuple (0-1 range)
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_freetext_annot(
|
||||||
|
rect,
|
||||||
|
text,
|
||||||
|
fontsize=fontsize,
|
||||||
|
text_color=color
|
||||||
|
)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_arrow(self, start_point, end_point, page_num=0,
|
||||||
|
color=(1, 0, 0), width=2):
|
||||||
|
"""
|
||||||
|
Add arrow annotation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_point: (x, y) tuple for arrow start
|
||||||
|
end_point: (x, y) tuple for arrow end
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
color: Arrow color RGB tuple (0-1 range), default red
|
||||||
|
width: Line width in points
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_line_annot(start_point, end_point)
|
||||||
|
annot.set_colors(stroke=color)
|
||||||
|
annot.set_border(width=width)
|
||||||
|
# Set arrow at end - use integer constant
|
||||||
|
annot.set_line_ends(0, 1) # 1 = ClosedArrow
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_stamp(self, rect, page_num=0, stamp_type=0):
|
||||||
|
"""
|
||||||
|
Add stamp annotation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rect: (x0, y0, x1, y1) bounding box tuple or fitz.Rect
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
stamp_type: Integer for stamp type:
|
||||||
|
0=Approved, 1=AsIs, 2=Confidential,
|
||||||
|
3=Departmental, 4=Draft, 5=Experimental,
|
||||||
|
6=Expired, 7=Final, 8=ForComment,
|
||||||
|
9=ForPublicRelease, 10=NotApproved, etc.
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_stamp_annot(rect, stamp=stamp_type)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def add_redaction(self, rect, page_num=0, fill_color=(0, 0, 0)):
|
||||||
|
"""
|
||||||
|
Add redaction annotation (marks area for redaction)
|
||||||
|
Note: Use apply_redactions() to permanently remove content
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rect: (x0, y0, x1, y1) area to redact, tuple or fitz.Rect
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
fill_color: RGB tuple (0-1 range) for redacted area, default black
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annot = page.add_redact_annot(rect, fill=fill_color)
|
||||||
|
annot.update()
|
||||||
|
return annot
|
||||||
|
|
||||||
|
def apply_redactions(self, page_num=0, images=2, graphics=2, text=2):
|
||||||
|
"""
|
||||||
|
Apply all redaction annotations on a page (permanent removal)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
images: 2=remove, 1=blank, 0=ignore
|
||||||
|
graphics: 2=remove, 1=blank, 0=ignore
|
||||||
|
text: 2=remove, 1=blank, 0=ignore
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if redactions were applied, False otherwise
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
# Check if page has redaction annotations
|
||||||
|
has_redactions = any(annot.type[0] == 12 for annot in page.annots())
|
||||||
|
|
||||||
|
if has_redactions:
|
||||||
|
page.apply_redactions(images=images, graphics=graphics, text=text)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_all_annotations(self, page_num=0):
|
||||||
|
"""
|
||||||
|
Retrieve all annotations from a page
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with annotation information
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
for annot in page.annots():
|
||||||
|
info = {
|
||||||
|
'type': annot.type[1], # Annotation type name
|
||||||
|
'rect': annot.rect,
|
||||||
|
'content': annot.info.get('content', ''),
|
||||||
|
'author': annot.info.get('title', ''),
|
||||||
|
'created': annot.info.get('creationDate', ''),
|
||||||
|
'colors': annot.colors
|
||||||
|
}
|
||||||
|
annotations.append(info)
|
||||||
|
|
||||||
|
return annotations
|
||||||
|
|
||||||
|
def remove_all_annotations(self, page_num=0):
|
||||||
|
"""
|
||||||
|
Remove all annotations from a page
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
for annot in page.annots():
|
||||||
|
page.delete_annot(annot)
|
||||||
|
|
||||||
|
def save(self, output_path):
|
||||||
|
"""Save the annotated PDF"""
|
||||||
|
self.doc.save(output_path)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.doc.close()
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
with PDFAnnotator("input.pdf") as annotator:
|
||||||
|
# Add yellow highlight
|
||||||
|
annotator.add_highlight((100, 100, 300, 120), page_num=0,
|
||||||
|
color=(1, 1, 0))
|
||||||
|
|
||||||
|
# Add red rectangle border
|
||||||
|
annotator.add_rectangle((100, 150, 300, 250), page_num=0,
|
||||||
|
color=(1, 0, 0), width=3)
|
||||||
|
|
||||||
|
# Add sticky note
|
||||||
|
annotator.add_text_note((400, 100), "This is important!",
|
||||||
|
page_num=0, icon="Comment")
|
||||||
|
|
||||||
|
# Add visible text box
|
||||||
|
annotator.add_free_text((100, 300, 400, 350), "DRAFT VERSION",
|
||||||
|
page_num=0, fontsize=20, color=(1, 0, 0))
|
||||||
|
|
||||||
|
# Add arrow pointing to something
|
||||||
|
annotator.add_arrow((450, 100), (500, 200), page_num=0,
|
||||||
|
color=(0, 0, 1), width=2)
|
||||||
|
|
||||||
|
# Add "Approved" stamp
|
||||||
|
annotator.add_stamp((450, 300, 550, 350), page_num=0, stamp_type=0)
|
||||||
|
|
||||||
|
# Add redaction (black box over sensitive info)
|
||||||
|
annotator.add_redaction((100, 400, 300, 420), page_num=0)
|
||||||
|
annotator.apply_redactions(page_num=0)
|
||||||
|
|
||||||
|
# List all annotations
|
||||||
|
annots = annotator.get_all_annotations(page_num=0)
|
||||||
|
print(f"Found {len(annots)} annotations:")
|
||||||
|
for a in annots:
|
||||||
|
print(f" - {a['type']} at {a['rect']}")
|
||||||
|
|
||||||
|
# Save annotated PDF
|
||||||
|
annotator.save("output_annotated.pdf")
|
||||||
210
src/file-processor/app/utils/pdf_converter.py
Normal file
210
src/file-processor/app/utils/pdf_converter.py
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from abc import ABC
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
import pikepdf
|
||||||
|
import pypandoc
|
||||||
|
from PIL import Image
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
|
||||||
|
from tasks.common.converter_utils import detect_file_type
|
||||||
|
|
||||||
|
|
||||||
|
class BaseConverter(ABC):
|
||||||
|
"""Abstract base class for file converters to PDF."""
|
||||||
|
|
||||||
|
def __init__(self, input_path: str, output_dir: str = ".") -> None:
|
||||||
|
self.input_path = Path(input_path)
|
||||||
|
self.output_dir = Path(output_dir)
|
||||||
|
self.output_path = self.output_dir / f"{self.generate_uuid_filename()}.pdf"
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
"""Convert input file to PDF and return the output path."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_uuid_filename() -> str:
|
||||||
|
"""Generate a unique filename using UUID4."""
|
||||||
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
def get_deterministic_date(self) -> str:
|
||||||
|
"""
|
||||||
|
Generate a deterministic date based on file content.
|
||||||
|
This ensures the same file always produces the same PDF.
|
||||||
|
"""
|
||||||
|
# Option 1: Use a fixed date
|
||||||
|
# return "D:20000101000000"
|
||||||
|
|
||||||
|
# Option 2: Generate date from content hash (recommended)
|
||||||
|
with open(self.input_path, 'rb') as f:
|
||||||
|
content = f.read()
|
||||||
|
content_hash = hashlib.sha256(content).hexdigest()
|
||||||
|
|
||||||
|
# Use first 14 characters of hash to create a valid date
|
||||||
|
# Format: D:YYYYMMDDHHmmss
|
||||||
|
hash_int = int(content_hash[:14], 16)
|
||||||
|
|
||||||
|
# Create a date between 2000-2099 to keep it reasonable
|
||||||
|
year = 2000 + (hash_int % 100)
|
||||||
|
month = 1 + (hash_int % 12)
|
||||||
|
day = 1 + (hash_int % 28) # Stay safe with 28 days
|
||||||
|
hour = hash_int % 24
|
||||||
|
minute = hash_int % 60
|
||||||
|
second = hash_int % 60
|
||||||
|
|
||||||
|
return f"D:{year:04d}{month:02d}{day:02d}{hour:02d}{minute:02d}{second:02d}"
|
||||||
|
|
||||||
|
def get_file_creation_date(self):
|
||||||
|
# Get file creation time (or modification time)
|
||||||
|
ts = os.path.getctime(self.input_path) # getmtime(self.input_path) for last modification
|
||||||
|
dt = datetime.datetime.fromtimestamp(ts)
|
||||||
|
|
||||||
|
# PDF expects format D:YYYYMMDDHHmmss
|
||||||
|
creation_date = dt.strftime("D:%Y%m%d%H%M%S")
|
||||||
|
return creation_date
|
||||||
|
|
||||||
|
def clean_pdf(self) -> Self:
|
||||||
|
"""Remove all non-deterministic metadata from PDF."""
|
||||||
|
with pikepdf.open(self.output_path, allow_overwriting_input=True) as pdf:
|
||||||
|
# Remove XMP metadata if it exists
|
||||||
|
if hasattr(pdf.Root, 'Metadata'):
|
||||||
|
del pdf.Root.Metadata
|
||||||
|
|
||||||
|
# Clear all document info by deleting each key
|
||||||
|
for key in list(pdf.docinfo.keys()):
|
||||||
|
del pdf.docinfo[key]
|
||||||
|
|
||||||
|
# Set deterministic metadata
|
||||||
|
pdf.docinfo["/Producer"] = "MyConverter"
|
||||||
|
pdf.docinfo["/Creator"] = "MyConverter"
|
||||||
|
pdf.docinfo["/CreationDate"] = self.get_deterministic_date()
|
||||||
|
pdf.docinfo["/ModDate"] = self.get_deterministic_date()
|
||||||
|
pdf.docinfo["/Title"] = self.input_path.name
|
||||||
|
|
||||||
|
# Save with deterministic IDs
|
||||||
|
# compress=True ensures consistent compression
|
||||||
|
# deterministic_id=True (if available) or static_id=True
|
||||||
|
pdf.save(
|
||||||
|
self.output_path,
|
||||||
|
fix_metadata_version=True,
|
||||||
|
compress_streams=True,
|
||||||
|
stream_decode_level=pikepdf.StreamDecodeLevel.generalized,
|
||||||
|
object_stream_mode=pikepdf.ObjectStreamMode.disable,
|
||||||
|
deterministic_id=True # Use this if pikepdf >= 8.0.0, otherwise use static_id=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class TextToPdfConverter(BaseConverter):
|
||||||
|
"""Converter for text files to PDF."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
c = canvas.Canvas(str(self.output_path), pagesize=A4)
|
||||||
|
|
||||||
|
# Fix metadata with deterministic values
|
||||||
|
info = c._doc.info
|
||||||
|
info.producer = "MyConverter"
|
||||||
|
info.creationDate = self.get_file_creation_date()
|
||||||
|
info.title = os.path.basename(self.input_path)
|
||||||
|
|
||||||
|
width, height = A4
|
||||||
|
with open(self.input_path, "r", encoding="utf-8") as f:
|
||||||
|
y = height - 50
|
||||||
|
for line in f:
|
||||||
|
c.drawString(50, y, line.strip())
|
||||||
|
y -= 15
|
||||||
|
if y < 50:
|
||||||
|
c.showPage()
|
||||||
|
y = height - 50
|
||||||
|
|
||||||
|
c.save()
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class PdfToPdfConverter(BaseConverter):
|
||||||
|
"""Converter for PDF files to PDF."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
# copy self.input_path to self.output_path
|
||||||
|
os.system(f"cp {self.input_path} {self.output_path}")
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class ImageToPdfConverter(BaseConverter):
|
||||||
|
"""Converter for image files to PDF."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
image = Image.open(self.input_path)
|
||||||
|
rgb_image = image.convert("RGB")
|
||||||
|
rgb_image.save(self.output_path)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class WordToPdfConverter(BaseConverter):
|
||||||
|
"""Converter for Word files (.docx) to PDF using pypandoc."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
pypandoc.convert_file(
|
||||||
|
str(self.input_path), "pdf", outputfile=str(self.output_path)
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
# Placeholders for future extensions
|
||||||
|
class HtmlToPdfConverter(BaseConverter):
|
||||||
|
"""Placeholder for HTML to PDF converter."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
raise NotImplementedError("HTML to PDF conversion not implemented.")
|
||||||
|
|
||||||
|
|
||||||
|
class ExcelToPdfConverter(BaseConverter):
|
||||||
|
"""Placeholder for Excel to PDF converter."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
raise NotImplementedError("Excel to PDF conversion not implemented.")
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownToPdfConverter(BaseConverter):
|
||||||
|
"""Placeholder for Markdown to PDF converter."""
|
||||||
|
|
||||||
|
def convert(self) -> Self:
|
||||||
|
raise NotImplementedError("Markdown to PDF conversion not implemented.")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_pdf(filepath: str, output_dir: str = ".") -> str:
|
||||||
|
"""
|
||||||
|
Convert any supported file to PDF.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath (str): Path to the input file.
|
||||||
|
output_dir (str): Directory to save the output PDF.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the generated PDF.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
UnsupportedFileTypeError: If the input file type is not supported.
|
||||||
|
"""
|
||||||
|
file_type = detect_file_type(filepath)
|
||||||
|
|
||||||
|
if file_type == "text":
|
||||||
|
converter = TextToPdfConverter(filepath, output_dir=output_dir)
|
||||||
|
elif file_type == "image":
|
||||||
|
converter = ImageToPdfConverter(filepath, output_dir=output_dir)
|
||||||
|
elif file_type == "word":
|
||||||
|
converter = WordToPdfConverter(filepath, output_dir=output_dir)
|
||||||
|
elif file_type == "pdf":
|
||||||
|
converter = PdfToPdfConverter(filepath, output_dir=output_dir)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported file type: {file_type}")
|
||||||
|
|
||||||
|
converter.convert()
|
||||||
|
converter.clean_pdf()
|
||||||
|
return str(converter.output_path)
|
||||||
167
src/file-processor/app/utils/pdf_thumbmail.py
Normal file
167
src/file-processor/app/utils/pdf_thumbmail.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
|
|
||||||
|
class PDFThumbnailGenerator:
|
||||||
|
def __init__(self, pdf_path):
|
||||||
|
"""
|
||||||
|
Initialize PDF thumbnail generator
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdf_path: Path to the PDF file (string or Path object)
|
||||||
|
"""
|
||||||
|
self.pdf_path = pdf_path
|
||||||
|
self.doc = fitz.open(pdf_path)
|
||||||
|
|
||||||
|
def create_thumbnail(self, output_path, page_num=0, width=200, rotation=0, zoom_factor=1.0):
|
||||||
|
"""
|
||||||
|
Create a thumbnail with zoom and rotation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_path: Path to save the thumbnail (string or Path)
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
width: Desired width in pixels, default 200
|
||||||
|
rotation: Rotation angle in degrees (0, 90, 180, 270), default 0
|
||||||
|
zoom_factor: Additional zoom multiplier (1.0 = normal, 2.0 = 2x), default 1.0
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with thumbnail info (width, height, rotation, zoom)
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
|
||||||
|
# Apply rotation to page
|
||||||
|
page.set_rotation(rotation)
|
||||||
|
|
||||||
|
# Calculate zoom to achieve desired width
|
||||||
|
base_zoom = width / page.rect.width
|
||||||
|
final_zoom = base_zoom * zoom_factor
|
||||||
|
|
||||||
|
# Create transformation matrix
|
||||||
|
mat = fitz.Matrix(final_zoom, final_zoom)
|
||||||
|
|
||||||
|
# Render page to pixmap
|
||||||
|
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||||
|
|
||||||
|
# Save thumbnail
|
||||||
|
pix.save(output_path)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'width': pix.width,
|
||||||
|
'height': pix.height,
|
||||||
|
'rotation': rotation,
|
||||||
|
'zoom': zoom_factor
|
||||||
|
}
|
||||||
|
|
||||||
|
def create_cropped_thumbnail(self, output_path, crop_rect=None, page_num=0, width=200):
|
||||||
|
"""
|
||||||
|
Create a thumbnail of a specific region (zoom on area)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_path: Path to save the thumbnail (string or Path)
|
||||||
|
crop_rect: Tuple (x0, y0, x1, y1) in PDF coordinates for cropping,
|
||||||
|
or None for full page, default None
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
width: Desired width in pixels, default 200
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple (width, height) of the generated thumbnail
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
|
||||||
|
if crop_rect:
|
||||||
|
# Create rectangle for cropping
|
||||||
|
rect = fitz.Rect(crop_rect)
|
||||||
|
zoom = width / rect.width
|
||||||
|
else:
|
||||||
|
rect = page.rect
|
||||||
|
zoom = width / page.rect.width
|
||||||
|
|
||||||
|
mat = fitz.Matrix(zoom, zoom)
|
||||||
|
|
||||||
|
# Render only the specified rectangle
|
||||||
|
pix = page.get_pixmap(matrix=mat, clip=rect)
|
||||||
|
pix.save(output_path)
|
||||||
|
|
||||||
|
return pix.width, pix.height
|
||||||
|
|
||||||
|
def get_page_info(self, page_num=0):
|
||||||
|
"""
|
||||||
|
Get information about a specific page
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with page information (width, height, rotation, number, total_pages)
|
||||||
|
"""
|
||||||
|
page = self.doc[page_num]
|
||||||
|
return {
|
||||||
|
'width': page.rect.width,
|
||||||
|
'height': page.rect.height,
|
||||||
|
'rotation': page.rotation,
|
||||||
|
'number': page_num + 1,
|
||||||
|
'total_pages': len(self.doc)
|
||||||
|
}
|
||||||
|
|
||||||
|
def create_multi_resolution_thumbnails(self, output_folder, page_num=0, sizes=(150, 300, 600)):
|
||||||
|
"""
|
||||||
|
Create multiple thumbnails at different resolutions
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_folder: Folder path to save thumbnails (string or Path)
|
||||||
|
page_num: Page number (0-indexed), default first page
|
||||||
|
sizes: List of widths in pixels, default [150, 300, 600]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mapping each size to thumbnail info
|
||||||
|
"""
|
||||||
|
output_folder = Path(output_folder)
|
||||||
|
output_folder.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for size in sizes:
|
||||||
|
output_path = output_folder / f"thumb_{size}px.png"
|
||||||
|
info = self.create_thumbnail(output_path, page_num=page_num, width=size)
|
||||||
|
results[size] = info
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close the PDF document and free resources"""
|
||||||
|
self.doc.close()
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Basic usage with context manager
|
||||||
|
with PDFThumbnailGenerator("example.pdf") as gen:
|
||||||
|
# Standard thumbnail
|
||||||
|
gen.create_thumbnail("thumb_standard.png", page_num=0, width=200)
|
||||||
|
|
||||||
|
# Rotated thumbnail
|
||||||
|
gen.create_thumbnail("thumb_rotated.png", page_num=0,
|
||||||
|
width=200, rotation=90)
|
||||||
|
|
||||||
|
# Zoomed thumbnail (2x zoom)
|
||||||
|
gen.create_thumbnail("thumb_zoomed.png", page_num=0,
|
||||||
|
width=200, zoom_factor=2.0)
|
||||||
|
|
||||||
|
# Cropped/zoomed on specific area (x0, y0, x1, y1)
|
||||||
|
gen.create_cropped_thumbnail("thumb_crop.png",
|
||||||
|
crop_rect=(100, 100, 400, 400),
|
||||||
|
page_num=0, width=300)
|
||||||
|
|
||||||
|
# Multiple resolutions
|
||||||
|
gen.create_multi_resolution_thumbnails("thumbnails/", page_num=0,
|
||||||
|
sizes=[150, 300, 600])
|
||||||
|
|
||||||
|
# Get page information
|
||||||
|
info = gen.get_page_info(page_num=0)
|
||||||
|
print(f"Page info: {info}")
|
||||||
@@ -4,9 +4,10 @@ Password security utilities using bcrypt for secure password hashing.
|
|||||||
This module provides secure password hashing and verification functions
|
This module provides secure password hashing and verification functions
|
||||||
using the bcrypt algorithm with automatic salt generation.
|
using the bcrypt algorithm with automatic salt generation.
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
|
||||||
import bcrypt
|
import bcrypt
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
|
|
||||||
def hash_password(password: str) -> str:
|
def hash_password(password: str) -> str:
|
||||||
@@ -71,4 +72,33 @@ def verify_password(password: str, hashed_password: str) -> bool:
|
|||||||
# bcrypt raises ValueError for malformed hashes
|
# bcrypt raises ValueError for malformed hashes
|
||||||
raise RuntimeError(f"Invalid hash format: {str(e)}")
|
raise RuntimeError(f"Invalid hash format: {str(e)}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to verify password: {str(e)}")
|
raise RuntimeError(f"Failed to verify password: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_uuid_filename() -> str:
|
||||||
|
"""Generate a unique filename using UUID4."""
|
||||||
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
|
||||||
|
def safe_connection_string(connection_string: str) -> str:
|
||||||
|
"""
|
||||||
|
Mask the password in a MongoDB connection string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
connection_string (str): The complete MongoDB connection string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The connection string with password replaced by asterisks
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> mask_mongodb_password("mongodb://admin:password123@mongodb:27017/mydocmanager?authSource=admin")
|
||||||
|
"mongodb://admin:***@mongodb:27017/mydocmanager?authSource=admin"
|
||||||
|
"""
|
||||||
|
# Pattern to detect password in MongoDB URL
|
||||||
|
# Format: mongodb://username:password@host:port/database
|
||||||
|
pattern = r'(mongodb://[^:]+:)([^@]+)(@.*)'
|
||||||
|
|
||||||
|
# Replace password with asterisks
|
||||||
|
masked_string = re.sub(pattern, r'\1*****\3', connection_string)
|
||||||
|
|
||||||
|
return masked_string
|
||||||
|
|||||||
@@ -1,11 +1,20 @@
|
|||||||
|
asgiref==3.9.1
|
||||||
bcrypt==4.3.0
|
bcrypt==4.3.0
|
||||||
celery==5.5.3
|
celery==5.5.3
|
||||||
email-validator==2.3.0
|
email-validator==2.3.0
|
||||||
fastapi==0.116.1
|
fastapi==0.116.1
|
||||||
httptools==0.6.4
|
httptools==0.6.4
|
||||||
motor==3.7.1
|
motor==3.7.1
|
||||||
pymongo==4.15.0
|
pikepdf==9.11.0
|
||||||
|
pillow==11.3.0
|
||||||
pydantic==2.11.9
|
pydantic==2.11.9
|
||||||
|
PyJWT==2.10.1
|
||||||
|
pymongo==4.15.0
|
||||||
|
PyMuPDF==1.26.4
|
||||||
|
pypandoc==1.15
|
||||||
|
python-multipart==0.0.20
|
||||||
redis==6.4.0
|
redis==6.4.0
|
||||||
|
reportlab==4.4.4
|
||||||
uvicorn==0.35.0
|
uvicorn==0.35.0
|
||||||
python-magic==0.4.27
|
python-magic==0.4.27
|
||||||
|
watchdog==6.0.0
|
||||||
41
src/frontend/.dockerignore
Normal file
41
src/frontend/.dockerignore
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Dependencies
|
||||||
|
node_modules
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
|
||||||
|
# Build outputs
|
||||||
|
dist
|
||||||
|
build
|
||||||
|
|
||||||
|
# Environment files
|
||||||
|
.env.local
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
|
||||||
|
# IDE files
|
||||||
|
.vscode
|
||||||
|
.idea
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Git
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
Dockerfile
|
||||||
|
.dockerignore
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
20
src/frontend/Dockerfile
Normal file
20
src/frontend/Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Use Node.js 20 Alpine for lightweight container
|
||||||
|
FROM node:20-alpine
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package.json and package-lock.json (if available)
|
||||||
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
# Copy source code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Expose Vite default port
|
||||||
|
EXPOSE 5173
|
||||||
|
|
||||||
|
# Start development server with host 0.0.0.0 to accept external connections
|
||||||
|
CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "5173"]
|
||||||
@@ -1,12 +1,93 @@
|
|||||||
# React + Vite
|
|
||||||
|
|
||||||
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
# MyDocManager Frontend
|
||||||
|
|
||||||
Currently, two official plugins are available:
|
## Overview
|
||||||
|
MyDocManager Frontend is a modern web application built with React and Vite that serves as the user interface for the MyDocManager document management system. The application provides a seamless experience for users to manage, process, and organize their documents with an intuitive and responsive interface.
|
||||||
|
|
||||||
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh
|
## Project Structure
|
||||||
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
|
frontend/
|
||||||
|
├── public/ # Public assets and static files
|
||||||
|
├── src/ # Source code
|
||||||
|
│ ├── assets/ # Icons, images, and other static assets
|
||||||
|
│ ├── components/ # Reusable UI components
|
||||||
|
│ │ ├── auth/ # Authentication-related components
|
||||||
|
│ │ └── common/ # Shared components (Header, Layout, etc.)
|
||||||
|
│ ├── contexts/ # React contexts for state management
|
||||||
|
│ ├── hooks/ # Custom React hooks
|
||||||
|
│ ├── pages/ # Page components representing full views
|
||||||
|
│ ├── services/ # API service interfaces
|
||||||
|
│ └── utils/ # Utility functions and helpers
|
||||||
|
├── Dockerfile # Container configuration for deployment
|
||||||
|
├── package.json # Dependencies and scripts
|
||||||
|
├── tailwind.config.js # Tailwind CSS configuration
|
||||||
|
└── vite.config.js # Vite bundler configuration
|
||||||
|
|
||||||
## Expanding the ESLint configuration
|
|
||||||
|
|
||||||
If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.
|
|
||||||
|
## Key Components
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- **AuthContext**: Provides authentication state and methods throughout the application
|
||||||
|
- **AuthLayout**: Layout wrapper specifically for authentication screens
|
||||||
|
- **LoginForm**: Form component for user authentication
|
||||||
|
- **ProtectedRoute**: Route guard that ensures authenticated access to protected pages
|
||||||
|
|
||||||
|
### UI Components
|
||||||
|
- **Layout**: Main application layout structure with menu and content areas
|
||||||
|
- **Header**: Application header with navigation and user controls
|
||||||
|
- **Menu**: Side navigation menu with application links
|
||||||
|
- **ThemeSwitcher**: Toggle for switching between light and dark themes
|
||||||
|
|
||||||
|
### Pages
|
||||||
|
- **LoginPage**: User authentication page
|
||||||
|
- **DashboardPage**: Main dashboard view for authenticated users
|
||||||
|
|
||||||
|
### Services
|
||||||
|
- **authService**: Handles API communication for authentication operations
|
||||||
|
- **api**: Base API utility for making HTTP requests to the backend
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
- Node.js (latest LTS version)
|
||||||
|
- npm or yarn package manager
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
1. Clone the repository
|
||||||
|
2. Navigate to the frontend directory
|
||||||
|
3. Install dependencies:
|
||||||
|
```
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development
|
||||||
|
Run the development server:
|
||||||
|
```
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
This will start the application in development mode at http://localhost:5173
|
||||||
|
|
||||||
|
### Building for Production
|
||||||
|
Create a production build:
|
||||||
|
```
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Technologies
|
||||||
|
- React 19.1.1
|
||||||
|
- Vite 7.1.2
|
||||||
|
- Tailwind CSS 4.1.13
|
||||||
|
- DaisyUI 5.1.24
|
||||||
|
- React Router 7.9.3
|
||||||
|
- Axios for API requests
|
||||||
|
|
||||||
|
## Features
|
||||||
|
- Responsive design with Tailwind CSS
|
||||||
|
- Authentication and authorization
|
||||||
|
- Light/dark theme support
|
||||||
|
- Document management interface
|
||||||
|
- Secure API communication
|
||||||
|
|
||||||
|
## Project Integration
|
||||||
|
This frontend application works in conjunction with the backend services and workers defined in other parts of the MyDocManager project to provide a complete document management solution.
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en" data-theme="dark">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>Vite + React</title>
|
<title>My Documents Manager</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="root"></div>
|
<div id="root"></div>
|
||||||
|
|||||||
1126
src/frontend/package-lock.json
generated
1126
src/frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,18 +10,26 @@
|
|||||||
"preview": "vite preview"
|
"preview": "vite preview"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@tailwindcss/vite": "^4.1.13",
|
||||||
|
"axios": "^1.12.2",
|
||||||
"react": "^19.1.1",
|
"react": "^19.1.1",
|
||||||
"react-dom": "^19.1.1"
|
"react-dom": "^19.1.1",
|
||||||
|
"react-icons": "^5.5.0",
|
||||||
|
"react-router-dom": "^7.9.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@eslint/js": "^9.33.0",
|
"@eslint/js": "^9.33.0",
|
||||||
"@types/react": "^19.1.10",
|
"@types/react": "^19.1.10",
|
||||||
"@types/react-dom": "^19.1.7",
|
"@types/react-dom": "^19.1.7",
|
||||||
"@vitejs/plugin-react": "^5.0.0",
|
"@vitejs/plugin-react": "^5.0.0",
|
||||||
|
"autoprefixer": "^10.4.21",
|
||||||
|
"daisyui": "^5.1.24",
|
||||||
"eslint": "^9.33.0",
|
"eslint": "^9.33.0",
|
||||||
"eslint-plugin-react-hooks": "^5.2.0",
|
"eslint-plugin-react-hooks": "^5.2.0",
|
||||||
"eslint-plugin-react-refresh": "^0.4.20",
|
"eslint-plugin-react-refresh": "^0.4.20",
|
||||||
"globals": "^16.3.0",
|
"globals": "^16.3.0",
|
||||||
|
"postcss": "^8.5.6",
|
||||||
|
"tailwindcss": "^4.1.13",
|
||||||
"vite": "^7.1.2"
|
"vite": "^7.1.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,42 +1,6 @@
|
|||||||
|
@import "tailwindcss";
|
||||||
|
|
||||||
#root {
|
#root {
|
||||||
max-width: 1280px;
|
max-width: 1280px;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
padding: 2rem;
|
}
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
.logo {
|
|
||||||
height: 6em;
|
|
||||||
padding: 1.5em;
|
|
||||||
will-change: filter;
|
|
||||||
transition: filter 300ms;
|
|
||||||
}
|
|
||||||
.logo:hover {
|
|
||||||
filter: drop-shadow(0 0 2em #646cffaa);
|
|
||||||
}
|
|
||||||
.logo.react:hover {
|
|
||||||
filter: drop-shadow(0 0 2em #61dafbaa);
|
|
||||||
}
|
|
||||||
|
|
||||||
@keyframes logo-spin {
|
|
||||||
from {
|
|
||||||
transform: rotate(0deg);
|
|
||||||
}
|
|
||||||
to {
|
|
||||||
transform: rotate(360deg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (prefers-reduced-motion: no-preference) {
|
|
||||||
a:nth-of-type(2) .logo {
|
|
||||||
animation: logo-spin infinite 20s linear;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.card {
|
|
||||||
padding: 2em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.read-the-docs {
|
|
||||||
color: #888;
|
|
||||||
}
|
|
||||||
@@ -1,35 +1,36 @@
|
|||||||
import { useState } from 'react'
|
import { BrowserRouter as Router, Routes, Route, Navigate } from 'react-router-dom';
|
||||||
import reactLogo from './assets/react.svg'
|
import { AuthProvider } from './contexts/AuthContext';
|
||||||
import viteLogo from '/vite.svg'
|
import ProtectedRoute from './components/common/ProtectedRoute';
|
||||||
import './App.css'
|
import Layout from './components/common/Layout';
|
||||||
|
import LoginPage from './pages/LoginPage';
|
||||||
|
import DashboardPage from './pages/DashboardPage';
|
||||||
|
import DocumentsPage from './pages/DocumentsPage';
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
const [count, setCount] = useState(0)
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<AuthProvider>
|
||||||
<div>
|
<Router>
|
||||||
<a href="https://vite.dev" target="_blank">
|
<div className="App">
|
||||||
<img src={viteLogo} className="logo" alt="Vite logo" />
|
<Routes>
|
||||||
</a>
|
{/* Public Routes */}
|
||||||
<a href="https://react.dev" target="_blank">
|
<Route path="/login" element={<LoginPage />} />
|
||||||
<img src={reactLogo} className="logo react" alt="React logo" />
|
|
||||||
</a>
|
{/* Protected Routes */}
|
||||||
</div>
|
<Route path="/" element={<ProtectedRoute><Layout /></ProtectedRoute>}>
|
||||||
<h1>Vite + React</h1>
|
<Route index element={<Navigate to="/documents" replace />} />
|
||||||
<div className="card">
|
<Route path="documents" element={<DocumentsPage />} />
|
||||||
<button onClick={() => setCount((count) => count + 1)}>
|
<Route path="dashboard" element={<DashboardPage />} />
|
||||||
count is {count}
|
<Route path="documents" element={<div>Documents Page - Coming Soon</div>} />
|
||||||
</button>
|
<Route path="users" element={<div>User Management - Coming Soon</div>} />
|
||||||
<p>
|
</Route>
|
||||||
Edit <code>src/App.jsx</code> and save to test HMR
|
|
||||||
</p>
|
{/* Catch all route */}
|
||||||
</div>
|
<Route path="*" element={<Navigate to="/dashboard" replace />} />
|
||||||
<p className="read-the-docs">
|
</Routes>
|
||||||
Click on the Vite and React logos to learn more
|
</div>
|
||||||
</p>
|
</Router>
|
||||||
</>
|
</AuthProvider>
|
||||||
)
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default App
|
export default App;
|
||||||
35
src/frontend/src/assets/icons.jsx
Normal file
35
src/frontend/src/assets/icons.jsx
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
// src/assets/icons.jsx
|
||||||
|
|
||||||
|
export const SunIcon = (
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
className="h-6 w-6"
|
||||||
|
fill="none"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
stroke="currentColor"
|
||||||
|
>
|
||||||
|
<path
|
||||||
|
strokeLinecap="round"
|
||||||
|
strokeLinejoin="round"
|
||||||
|
strokeWidth="2"
|
||||||
|
d="M12 3v1m0 16v1m8.66-9h-1M4.34 12h-1m15.36 6.36l-.7-.7M6.34 6.34l-.7-.7m12.02 12.02l-.7-.7M6.34 17.66l-.7-.7M16 12a4 4 0 11-8 0 4 4 0 018 0z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
);
|
||||||
|
|
||||||
|
export const MoonIcon = (
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
className="h-6 w-6"
|
||||||
|
fill="none"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
stroke="currentColor"
|
||||||
|
>
|
||||||
|
<path
|
||||||
|
strokeLinecap="round"
|
||||||
|
strokeLinejoin="round"
|
||||||
|
strokeWidth="2"
|
||||||
|
d="M21 12.79A9 9 0 1111.21 3a7 7 0 0010.79 9.79z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
);
|
||||||
41
src/frontend/src/components/auth/AuthLayout.jsx
Normal file
41
src/frontend/src/components/auth/AuthLayout.jsx
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import React from 'react';
|
||||||
|
import ThemeSwitcher from "../common/ThemeSwither.jsx";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AuthLayout component for authentication pages
|
||||||
|
* Provides centered layout with background and responsive design
|
||||||
|
*
|
||||||
|
* @param {Object} props - Component props
|
||||||
|
* @param {React.ReactNode} props.children - Child components to render
|
||||||
|
*/
|
||||||
|
|
||||||
|
const AuthHeader = () => {
|
||||||
|
return (
|
||||||
|
<div className="navbar bg-base-100 shadow-lg">
|
||||||
|
<div className="navbar-start">
|
||||||
|
<h1 className="text-xl font-bold">MyDocManager</h1>
|
||||||
|
</div>
|
||||||
|
<div className="navbar-end">
|
||||||
|
<ThemeSwitcher/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function AuthLayout({children}) {
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen bg-gradient-to-br from-primary/10 via-base-200 to-secondary/10">
|
||||||
|
<AuthHeader/>
|
||||||
|
{/* Main container with flex centering */}
|
||||||
|
<div className="min-h-screen flex items-center justify-center p-4">
|
||||||
|
{/* Content wrapper for responsive spacing */}
|
||||||
|
<div>
|
||||||
|
{children}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default AuthLayout;
|
||||||
202
src/frontend/src/components/auth/LoginForm.jsx
Normal file
202
src/frontend/src/components/auth/LoginForm.jsx
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
import React, {useEffect, useState} from 'react';
|
||||||
|
import {useAuth} from '../../contexts/AuthContext';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LoginForm component with DaisyUI styling
|
||||||
|
* Handles user authentication with form validation and error display
|
||||||
|
*/
|
||||||
|
function LoginForm() {
|
||||||
|
const {login, loading, error, clearError} = useAuth();
|
||||||
|
const [formData, setFormData] = useState({
|
||||||
|
username: '',
|
||||||
|
password: '',
|
||||||
|
});
|
||||||
|
const [formErrors, setFormErrors] = useState({});
|
||||||
|
|
||||||
|
// Clear errors when component mounts or form data changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (error) {
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
clearError();
|
||||||
|
}, 5000); // Clear error after 5 seconds
|
||||||
|
|
||||||
|
return () => clearTimeout(timer);
|
||||||
|
}
|
||||||
|
}, [error, clearError]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle input changes and clear related errors
|
||||||
|
* @param {Event} e - Input change event
|
||||||
|
*/
|
||||||
|
const handleInputChange = (e) => {
|
||||||
|
const {name, value} = e.target;
|
||||||
|
|
||||||
|
setFormData(prev => ({
|
||||||
|
...prev,
|
||||||
|
[name]: value,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Clear field error when user starts typing
|
||||||
|
if (formErrors[name]) {
|
||||||
|
setFormErrors(prev => ({
|
||||||
|
...prev,
|
||||||
|
[name]: '',
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear global error when user modifies form
|
||||||
|
if (error) {
|
||||||
|
clearError();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate form data before submission
|
||||||
|
* @returns {boolean} True if form is valid
|
||||||
|
*/
|
||||||
|
const validateForm = () => {
|
||||||
|
const errors = {};
|
||||||
|
|
||||||
|
if (!formData.username.trim()) {
|
||||||
|
errors.username = 'Username is required';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!formData.password.trim()) {
|
||||||
|
errors.password = 'Password is required';
|
||||||
|
} else if (formData.password.length < 3) {
|
||||||
|
errors.password = 'Password must be at least 3 characters';
|
||||||
|
}
|
||||||
|
|
||||||
|
setFormErrors(errors);
|
||||||
|
return Object.keys(errors).length === 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle form submission
|
||||||
|
* @param {Event} e - Form submission event
|
||||||
|
*/
|
||||||
|
const handleSubmit = async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
|
||||||
|
if (!validateForm()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const success = await login(formData.username, formData.password);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
// Reset form on successful login
|
||||||
|
setFormData({username: '', password: ''});
|
||||||
|
setFormErrors({});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="card max-w-md shadow-xl bg-base-100">
|
||||||
|
<div className="card-body">
|
||||||
|
{/* Card Header */}
|
||||||
|
<div className="text-center mb-6">
|
||||||
|
<p className="text-base-content/70 mt-2">Sign in to your account</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Global Error Alert */}
|
||||||
|
{error && (
|
||||||
|
<div className="alert alert-error mb-4">
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
className="stroke-current shrink-0 h-6 w-6"
|
||||||
|
fill="none"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
>
|
||||||
|
<path
|
||||||
|
strokeLinecap="round"
|
||||||
|
strokeLinejoin="round"
|
||||||
|
strokeWidth="2"
|
||||||
|
d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
<span>{error}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Login Form */}
|
||||||
|
<form onSubmit={handleSubmit}>
|
||||||
|
{/* Username Field */}
|
||||||
|
<div id="username">
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text font-medium">Username</span>
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="username"
|
||||||
|
value={formData.username}
|
||||||
|
onChange={handleInputChange}
|
||||||
|
placeholder="Enter your username"
|
||||||
|
className={`input input-bordered w-full${
|
||||||
|
formErrors.username ? 'input-error' : ''
|
||||||
|
}`}
|
||||||
|
disabled={loading}
|
||||||
|
autoComplete="username"
|
||||||
|
/>
|
||||||
|
{formErrors.username && (
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text-alt text-error">{formErrors.username}</span>
|
||||||
|
</label>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Password Field */}
|
||||||
|
<div id="password">
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text font-medium">Password</span>
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="password"
|
||||||
|
value={formData.password}
|
||||||
|
onChange={handleInputChange}
|
||||||
|
placeholder="Enter your password"
|
||||||
|
className={`input input-bordered ${
|
||||||
|
formErrors.password ? 'input-error' : ''
|
||||||
|
}`}
|
||||||
|
disabled={loading}
|
||||||
|
autoComplete="current-password"
|
||||||
|
/>
|
||||||
|
{formErrors.password && (
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text-alt text-error">{formErrors.password}</span>
|
||||||
|
</label>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Submit Button */}
|
||||||
|
<div className="form-control mt-6">
|
||||||
|
<button
|
||||||
|
type="submit"
|
||||||
|
className={`btn btn-primary w-1/3 btn-hover-effect ${loading ? 'loading' : ''}`}
|
||||||
|
disabled={loading}
|
||||||
|
>
|
||||||
|
{loading ? (
|
||||||
|
<>
|
||||||
|
<span className="loading loading-spinner loading-sm"></span>
|
||||||
|
Signing in...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
'Sign In'
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{/* Additional Info */}
|
||||||
|
<div className="text-center mt-4">
|
||||||
|
<p className="text-sm text-base-content/60">
|
||||||
|
Don't have an account? Contact your administrator.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default LoginForm;
|
||||||
49
src/frontend/src/components/common/Header.jsx
Normal file
49
src/frontend/src/components/common/Header.jsx
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import {useAuth} from '../../hooks/useAuth';
|
||||||
|
import {useNavigate} from 'react-router-dom';
|
||||||
|
import ThemeSwitcher from "./ThemeSwither.jsx";
|
||||||
|
import React from "react";
|
||||||
|
|
||||||
|
const Header = () => {
|
||||||
|
const {user, logout} = useAuth();
|
||||||
|
const navigate = useNavigate();
|
||||||
|
|
||||||
|
const handleLogout = async () => {
|
||||||
|
await logout();
|
||||||
|
navigate('/login');
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="navbar bg-base-100">
|
||||||
|
<div className="navbar-start">
|
||||||
|
<h1 className="text-xl font-bold">MyDocManager</h1>
|
||||||
|
</div>
|
||||||
|
<div className="navbar-end">
|
||||||
|
<div className="dropdown dropdown-end">
|
||||||
|
<div tabIndex={0} role="button" className="btn btn-ghost btn-circle avatar">
|
||||||
|
<div className="w-10 rounded-full bg-primary text-primary-content flex items-center justify-center">
|
||||||
|
<span className="text-sm font-medium">
|
||||||
|
{user?.username?.charAt(0).toUpperCase()}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<ul tabIndex={0} className="menu menu-sm dropdown-content bg-base-100 rounded-box z-[1] mt-3 w-52 p-2 shadow">
|
||||||
|
<li>
|
||||||
|
<div className="justify-between">
|
||||||
|
Profile
|
||||||
|
<span className="badge badge-sm">{user?.role}</span>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
<li><a>Settings</a></li>
|
||||||
|
<li><ThemeSwitcher/></li>
|
||||||
|
<li>
|
||||||
|
<button onClick={handleLogout}>Logout</button>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default Header;
|
||||||
24
src/frontend/src/components/common/Layout.jsx
Normal file
24
src/frontend/src/components/common/Layout.jsx
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import Header from './Header';
|
||||||
|
import {Outlet} from 'react-router-dom';
|
||||||
|
import Menu from "./Menu.jsx";
|
||||||
|
import styles from './Layout.module.css';
|
||||||
|
|
||||||
|
const Layout = () => {
|
||||||
|
return (
|
||||||
|
<div className={styles.layoutContainer}>
|
||||||
|
<Header/>
|
||||||
|
<div className="flex flex-1 overflow-hidden">
|
||||||
|
<aside className={styles.sidebar}>
|
||||||
|
<Menu/>
|
||||||
|
</aside>
|
||||||
|
<main className={styles.mainContent}>
|
||||||
|
<div className={styles.mainContentInner}>
|
||||||
|
<Outlet/>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default Layout;
|
||||||
36
src/frontend/src/components/common/Layout.module.css
Normal file
36
src/frontend/src/components/common/Layout.module.css
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
/* Layout Container */
|
||||||
|
.layoutContainer {
|
||||||
|
height: 100vh;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
background-color: var(--color-base-200);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Sidebar */
|
||||||
|
.sidebar {
|
||||||
|
width: 16rem; /* 64px = 4rem, donc 256px = 16rem */
|
||||||
|
background-color: var(--color-base-100);
|
||||||
|
box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content Area */
|
||||||
|
.mainContent {
|
||||||
|
flex: 1;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
min-height: 0; /* Important for flex to work properly with scrolling */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content Inner Container */
|
||||||
|
.mainContentInner {
|
||||||
|
max-width: 80rem; /* container max-width */
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
flex: 1;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
min-height: 0; /* Important for flex to work properly with scrolling */
|
||||||
|
}
|
||||||
18
src/frontend/src/components/common/Menu.jsx
Normal file
18
src/frontend/src/components/common/Menu.jsx
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import {FaBuffer, FaPlus} from "react-icons/fa6";
|
||||||
|
import { Link } from "react-router-dom";
|
||||||
|
|
||||||
|
const Menu = () => {
|
||||||
|
return (
|
||||||
|
<div className="p-4">
|
||||||
|
<ul className="menu">
|
||||||
|
<li className="menu-title">Exploration</li>
|
||||||
|
<li><Link to="/dashboard"><FaBuffer/>Dashboard</Link></li>
|
||||||
|
<li><Link to="/documents"><FaBuffer/>To Review</Link></li>
|
||||||
|
<li className="menu-title mt-4">Catégories</li>
|
||||||
|
<li><a><i className="fas fa-plus"></i>Item</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default Menu;
|
||||||
69
src/frontend/src/components/common/ProtectedRoute.jsx
Normal file
69
src/frontend/src/components/common/ProtectedRoute.jsx
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import React from 'react';
|
||||||
|
import {Navigate, useLocation} from 'react-router-dom';
|
||||||
|
import {useAuth} from '../../contexts/AuthContext';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ProtectedRoute component to guard routes that require authentication
|
||||||
|
* Redirects to login if user is not authenticated, preserving intended destination
|
||||||
|
*
|
||||||
|
* @param {Object} props - Component props
|
||||||
|
* @param {React.ReactNode} props.children - Child components to render if authenticated
|
||||||
|
* @param {string[]} props.allowedRoles - Array of roles allowed to access this route (optional)
|
||||||
|
*/
|
||||||
|
function ProtectedRoute({children, allowedRoles = []}) {
|
||||||
|
const {isAuthenticated, loading, user} = useAuth();
|
||||||
|
const location = useLocation();
|
||||||
|
|
||||||
|
// Show loading spinner while checking authentication
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen flex items-center justify-center bg-base-200">
|
||||||
|
<div className="text-center">
|
||||||
|
<span className="loading loading-spinner loading-lg text-primary"></span>
|
||||||
|
<p className="text-base-content/70 mt-4">Checking authentication...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Redirect to login if not authenticated
|
||||||
|
if (!isAuthenticated) {
|
||||||
|
return (
|
||||||
|
<Navigate
|
||||||
|
to="/login"
|
||||||
|
state={{from: location}}
|
||||||
|
replace
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check role-based access if allowedRoles is specified
|
||||||
|
if (allowedRoles.length > 0 && user && !allowedRoles.includes(user.role)) {
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen flex items-center justify-center bg-base-200">
|
||||||
|
<div className="card w-full max-w-md shadow-xl bg-base-100">
|
||||||
|
<div className="card-body text-center">
|
||||||
|
<div className="text-6xl mb-4">🚫</div>
|
||||||
|
<h2 className="card-title justify-center text-error">Access Denied</h2>
|
||||||
|
<p className="text-base-content/70 mb-4">
|
||||||
|
You don't have permission to access this page.
|
||||||
|
</p>
|
||||||
|
<div className="card-actions justify-center">
|
||||||
|
<button
|
||||||
|
className="btn btn-primary"
|
||||||
|
onClick={() => window.history.back()}
|
||||||
|
>
|
||||||
|
Go Back
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// User is authenticated and authorized, render children
|
||||||
|
return children;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ProtectedRoute;
|
||||||
29
src/frontend/src/components/common/ThemeSwither.jsx
Normal file
29
src/frontend/src/components/common/ThemeSwither.jsx
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import {useEffect, useState} from "react";
|
||||||
|
import {MoonIcon, SunIcon} from "../../assets/icons.jsx";
|
||||||
|
|
||||||
|
function ThemeSwitcher() {
|
||||||
|
// State to store current theme
|
||||||
|
const [theme, setTheme] = useState("light");
|
||||||
|
|
||||||
|
// When theme changes, apply it to <html data-theme="">
|
||||||
|
useEffect(() => {
|
||||||
|
document.querySelector("html").setAttribute("data-theme", theme);
|
||||||
|
}, [theme]);
|
||||||
|
|
||||||
|
// Toggle between light and dark
|
||||||
|
const toggleTheme = () => {
|
||||||
|
setTheme(theme === "light" ? "dark" : "light");
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
onClick={toggleTheme}
|
||||||
|
className="btn btn-ghost btn-circle"
|
||||||
|
>
|
||||||
|
{theme === "light" ? MoonIcon : SunIcon}
|
||||||
|
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ThemeSwitcher;
|
||||||
68
src/frontend/src/components/documents/DeleteConfirmModal.jsx
Normal file
68
src/frontend/src/components/documents/DeleteConfirmModal.jsx
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
/**
|
||||||
|
* DeleteConfirmModal Component
|
||||||
|
* Modal dialog to confirm document deletion
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React from 'react';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DeleteConfirmModal component
|
||||||
|
* @param {Object} props
|
||||||
|
* @param {boolean} props.isOpen - Whether the modal is open
|
||||||
|
* @param {Object|null} props.document - Document to delete
|
||||||
|
* @param {function(): void} props.onClose - Callback when modal is closed
|
||||||
|
* @param {function(): void} props.onConfirm - Callback when deletion is confirmed
|
||||||
|
* @param {boolean} props.isDeleting - Whether deletion is in progress
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const DeleteConfirmModal = ({
|
||||||
|
isOpen,
|
||||||
|
document,
|
||||||
|
onClose,
|
||||||
|
onConfirm,
|
||||||
|
isDeleting = false
|
||||||
|
}) => {
|
||||||
|
if (!isOpen || !document) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<dialog className="modal modal-open">
|
||||||
|
<div className="modal-box">
|
||||||
|
<h3 className="font-bold text-lg">Confirm Deletion</h3>
|
||||||
|
<p className="py-4">
|
||||||
|
Are you sure you want to delete <span className="font-semibold">"{document.name}"</span>?
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-gray-500">
|
||||||
|
This action cannot be undone.
|
||||||
|
</p>
|
||||||
|
<div className="modal-action">
|
||||||
|
<button
|
||||||
|
className="btn btn-ghost"
|
||||||
|
onClick={onClose}
|
||||||
|
disabled={isDeleting}
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-error"
|
||||||
|
onClick={onConfirm}
|
||||||
|
disabled={isDeleting}
|
||||||
|
>
|
||||||
|
{isDeleting ? (
|
||||||
|
<>
|
||||||
|
<span className="loading loading-spinner loading-sm"></span>
|
||||||
|
Deleting...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
'Delete'
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<form method="dialog" className="modal-backdrop" onClick={onClose}>
|
||||||
|
<button disabled={isDeleting}>close</button>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default DeleteConfirmModal;
|
||||||
285
src/frontend/src/components/documents/DocumentCard.jsx
Normal file
285
src/frontend/src/components/documents/DocumentCard.jsx
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
/**
|
||||||
|
* DocumentCard Component
|
||||||
|
* Displays a document as a DaisyUI card with thumbnail and metadata
|
||||||
|
* Supports different view modes: small, large, and detail
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React, {memo, useState, useEffect} from 'react';
|
||||||
|
import {API_BASE_URL} from "../../utils/api.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats file size to human-readable format
|
||||||
|
* @param {number} bytes - File size in bytes
|
||||||
|
* @returns {string} Formatted file size
|
||||||
|
*/
|
||||||
|
const formatFileSize = (bytes) => {
|
||||||
|
if (bytes === 0) return '0 Bytes';
|
||||||
|
const k = 1024;
|
||||||
|
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||||
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats date to localized string
|
||||||
|
* @param {string} dateString - ISO date string
|
||||||
|
* @returns {string} Formatted date
|
||||||
|
*/
|
||||||
|
const formatDate = (dateString) => {
|
||||||
|
return new Date(dateString).toLocaleDateString('en-US', {
|
||||||
|
year: 'numeric',
|
||||||
|
month: 'short',
|
||||||
|
day: 'numeric'
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds full URL from relative path
|
||||||
|
* @param {string} relativePath - Relative API path
|
||||||
|
* @returns {string} Full URL
|
||||||
|
*/
|
||||||
|
const buildFullUrl = (relativePath) => {
|
||||||
|
if (!relativePath) return '';
|
||||||
|
const baseUrl = import.meta.env.VITE_API_BASE_URL || API_BASE_URL;
|
||||||
|
return `${baseUrl}${relativePath}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook to load protected images with bearer token
|
||||||
|
* @param {string} url - Image URL
|
||||||
|
* @returns {Object} { imageSrc, loading, error }
|
||||||
|
*/
|
||||||
|
const useProtectedImage = (url) => {
|
||||||
|
const [imageSrc, setImageSrc] = useState(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState(false);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!url) {
|
||||||
|
setLoading(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let objectUrl;
|
||||||
|
|
||||||
|
const fetchImage = async () => {
|
||||||
|
try {
|
||||||
|
const token = localStorage.getItem('access_token');
|
||||||
|
const fullUrl = buildFullUrl(url);
|
||||||
|
|
||||||
|
const response = await fetch(fullUrl, {
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${token}`
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error('Failed to load image');
|
||||||
|
}
|
||||||
|
|
||||||
|
const blob = await response.blob();
|
||||||
|
objectUrl = URL.createObjectURL(blob);
|
||||||
|
setImageSrc(objectUrl);
|
||||||
|
setLoading(false);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Error loading thumbnail:', err);
|
||||||
|
setError(true);
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchImage();
|
||||||
|
|
||||||
|
// Cleanup: revoke object URL on unmount
|
||||||
|
return () => {
|
||||||
|
if (objectUrl) {
|
||||||
|
URL.revokeObjectURL(objectUrl);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}, [url]);
|
||||||
|
|
||||||
|
return { imageSrc, loading, error };
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DocumentCard component
|
||||||
|
* @param {Object} props
|
||||||
|
* @param {Object} props.document - Document object
|
||||||
|
* @param {'small'|'large'|'detail'} props.viewMode - Current view mode
|
||||||
|
* @param {function(): void} props.onEdit - Callback when edit is clicked
|
||||||
|
* @param {function(): void} props.onDelete - Callback when delete is clicked
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const DocumentCard = memo(({document, viewMode, onEdit, onDelete}) => {
|
||||||
|
const {name, originalFileType, thumbnailUrl, pageCount, fileSize, createdAt, tags, categories} = document;
|
||||||
|
|
||||||
|
// Load protected image
|
||||||
|
const { imageSrc, loading, error } = useProtectedImage(thumbnailUrl);
|
||||||
|
|
||||||
|
// Determine card classes based on view mode
|
||||||
|
const getCardClasses = () => {
|
||||||
|
const baseClasses = 'card bg-base-100 shadow-xl hover:shadow-2xl transition-shadow group relative';
|
||||||
|
|
||||||
|
switch (viewMode) {
|
||||||
|
case 'small':
|
||||||
|
return `${baseClasses} w-full`;
|
||||||
|
case 'large':
|
||||||
|
return `${baseClasses} w-full`;
|
||||||
|
case 'detail':
|
||||||
|
return `${baseClasses} w-full`;
|
||||||
|
default:
|
||||||
|
return baseClasses;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render thumbnail with hover actions
|
||||||
|
const renderThumbnail = () => {
|
||||||
|
const heightClass = viewMode === 'small' ? 'h-48' : viewMode === 'large' ? 'h-64' : 'h-64';
|
||||||
|
|
||||||
|
return (
|
||||||
|
<figure className="relative overflow-hidden">
|
||||||
|
{loading ? (
|
||||||
|
<div className={`w-[200px] ${heightClass} bg-gray-200 animate-pulse flex items-center justify-center`}>
|
||||||
|
<svg className="w-8 h-8 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
) : error ? (
|
||||||
|
<div className={`w-[200px] ${heightClass} bg-gray-300 flex flex-col items-center justify-center`}>
|
||||||
|
<svg className="w-8 h-8 text-gray-500 mb-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||||
|
</svg>
|
||||||
|
<span className="text-gray-500 text-xs">Failed to load</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<img
|
||||||
|
src={imageSrc}
|
||||||
|
alt={`${name}`}
|
||||||
|
className={`object-cover ${heightClass}`}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Hover overlay with actions */}
|
||||||
|
<div className="absolute top-2 right-2 flex gap-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||||
|
<button
|
||||||
|
className="btn btn-sm btn-circle btn-primary"
|
||||||
|
onClick={onEdit}
|
||||||
|
aria-label="Edit document"
|
||||||
|
title="Edit"
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24"
|
||||||
|
stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
|
||||||
|
d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-sm btn-circle btn-error"
|
||||||
|
onClick={onDelete}
|
||||||
|
aria-label="Delete document"
|
||||||
|
title="Delete"
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24"
|
||||||
|
stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
|
||||||
|
d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* File type badge */}
|
||||||
|
<div className="absolute bottom-2 left-2">
|
||||||
|
<span className="badge badge-accent badge-sm">{originalFileType}</span>
|
||||||
|
</div>
|
||||||
|
</figure>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render card body based on view mode
|
||||||
|
const renderCardBody = () => {
|
||||||
|
if (viewMode === 'small') {
|
||||||
|
return (
|
||||||
|
<div className="card-body p-3">
|
||||||
|
<h3 className="card-title text-sm truncate" title={name}>{name}</h3>
|
||||||
|
<p className="text-xs text-gray-500">{pageCount} page{pageCount > 1 ? 's' : ''}</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (viewMode === 'large') {
|
||||||
|
return (
|
||||||
|
<div className="card-body p-4">
|
||||||
|
<h3 className="card-title text-base truncate" title={name}>{name}</h3>
|
||||||
|
<div className="flex flex-wrap gap-1 mb-2">
|
||||||
|
{tags.slice(0, 3).map(tag => (
|
||||||
|
<span key={tag} className="badge badge-primary badge-xs">{tag}</span>
|
||||||
|
))}
|
||||||
|
{tags.length > 3 && (
|
||||||
|
<span className="badge badge-ghost badge-xs">+{tags.length - 3}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="text-sm space-y-1">
|
||||||
|
<p className="text-gray-500">{pageCount} page{pageCount > 1 ? 's' : ''}</p>
|
||||||
|
<p className="text-gray-500">{formatFileSize(fileSize)}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detail mode
|
||||||
|
return (
|
||||||
|
<div className="card-body">
|
||||||
|
<h3 className="card-title text-lg" title={name}>{name}</h3>
|
||||||
|
|
||||||
|
{/* Tags */}
|
||||||
|
{tags.length > 0 && (
|
||||||
|
<div className="flex flex-wrap gap-1 mb-2">
|
||||||
|
{tags.map(tag => (
|
||||||
|
<span key={tag} className="badge badge-primary badge-sm">{tag}</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Categories */}
|
||||||
|
{categories.length > 0 && (
|
||||||
|
<div className="flex flex-wrap gap-1 mb-3">
|
||||||
|
{categories.map(category => (
|
||||||
|
<span key={category} className="badge badge-secondary badge-sm">{category}</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Metadata */}
|
||||||
|
<div className="grid grid-cols-2 gap-2 text-sm">
|
||||||
|
<div>
|
||||||
|
<span className="font-semibold">Pages:</span>
|
||||||
|
<span className="ml-2 text-gray-500">{pageCount}</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="font-semibold">Size:</span>
|
||||||
|
<span className="ml-2 text-gray-500">{formatFileSize(fileSize)}</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="font-semibold">Type:</span>
|
||||||
|
<span className="ml-2 text-gray-500">{originalFileType}</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="font-semibold">Date:</span>
|
||||||
|
<span className="ml-2 text-gray-500">{formatDate(createdAt)}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={getCardClasses()}>
|
||||||
|
{renderThumbnail()}
|
||||||
|
{renderCardBody()}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
DocumentCard.displayName = 'DocumentCard';
|
||||||
|
|
||||||
|
export default DocumentCard;
|
||||||
164
src/frontend/src/components/documents/DocumentDetailView.jsx
Normal file
164
src/frontend/src/components/documents/DocumentDetailView.jsx
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
/**
|
||||||
|
* DocumentDetailView Component
|
||||||
|
* Displays a document in detail mode with all pages visible
|
||||||
|
* This is a placeholder that shows multiple page thumbnails
|
||||||
|
* When real PDF backend is ready, this can be replaced with actual PDF rendering
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React from 'react';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats file size to human-readable format
|
||||||
|
* @param {number} bytes - File size in bytes
|
||||||
|
* @returns {string} Formatted file size
|
||||||
|
*/
|
||||||
|
const formatFileSize = (bytes) => {
|
||||||
|
if (bytes === 0) return '0 Bytes';
|
||||||
|
const k = 1024;
|
||||||
|
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||||
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats date to localized string
|
||||||
|
* @param {string} dateString - ISO date string
|
||||||
|
* @returns {string} Formatted date
|
||||||
|
*/
|
||||||
|
const formatDate = (dateString) => {
|
||||||
|
return new Date(dateString).toLocaleDateString('en-US', {
|
||||||
|
year: 'numeric',
|
||||||
|
month: 'long',
|
||||||
|
day: 'numeric',
|
||||||
|
hour: '2-digit',
|
||||||
|
minute: '2-digit'
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DocumentDetailView component
|
||||||
|
* @param {Object} props
|
||||||
|
* @param {Object} props.document - Document object
|
||||||
|
* @param {function(): void} props.onEdit - Callback when edit is clicked
|
||||||
|
* @param {function(): void} props.onDelete - Callback when delete is clicked
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const DocumentDetailView = ({ document, onEdit, onDelete }) => {
|
||||||
|
const {
|
||||||
|
name,
|
||||||
|
originalFileType,
|
||||||
|
thumbnailUrl,
|
||||||
|
pageCount,
|
||||||
|
fileSize,
|
||||||
|
createdAt,
|
||||||
|
tags,
|
||||||
|
categories
|
||||||
|
} = document;
|
||||||
|
|
||||||
|
// Generate placeholder pages (in real implementation, these would be actual PDF pages)
|
||||||
|
const pages = Array.from({ length: pageCount }, (_, i) => ({
|
||||||
|
pageNumber: i + 1,
|
||||||
|
thumbnailUrl: thumbnailUrl.replace('Page+1', `Page+${i + 1}`)
|
||||||
|
}));
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="card bg-base-100 shadow-xl">
|
||||||
|
{/* Header with actions */}
|
||||||
|
<div className="card-body">
|
||||||
|
<div className="flex justify-between items-start mb-4">
|
||||||
|
<div className="flex-1">
|
||||||
|
<h2 className="card-title text-2xl mb-2">{name}</h2>
|
||||||
|
|
||||||
|
{/* Tags */}
|
||||||
|
{tags.length > 0 && (
|
||||||
|
<div className="flex flex-wrap gap-2 mb-2">
|
||||||
|
<span className="text-sm font-semibold text-gray-600">Tags:</span>
|
||||||
|
{tags.map(tag => (
|
||||||
|
<span key={tag} className="badge badge-primary">{tag}</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Categories */}
|
||||||
|
{categories.length > 0 && (
|
||||||
|
<div className="flex flex-wrap gap-2 mb-3">
|
||||||
|
<span className="text-sm font-semibold text-gray-600">Categories:</span>
|
||||||
|
{categories.map(category => (
|
||||||
|
<span key={category} className="badge badge-secondary">{category}</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Action buttons */}
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
onClick={onEdit}
|
||||||
|
aria-label="Edit document"
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
|
||||||
|
</svg>
|
||||||
|
Edit
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-error btn-sm"
|
||||||
|
onClick={onDelete}
|
||||||
|
aria-label="Delete document"
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||||
|
</svg>
|
||||||
|
Delete
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Metadata grid */}
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6 p-4 bg-base-200 rounded-lg">
|
||||||
|
<div>
|
||||||
|
<span className="text-sm font-semibold text-gray-600">Original Type</span>
|
||||||
|
<p className="text-lg">{originalFileType}</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="text-sm font-semibold text-gray-600">Pages</span>
|
||||||
|
<p className="text-lg">{pageCount}</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="text-sm font-semibold text-gray-600">File Size</span>
|
||||||
|
<p className="text-lg">{formatFileSize(fileSize)}</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span className="text-sm font-semibold text-gray-600">Created</span>
|
||||||
|
<p className="text-lg">{formatDate(createdAt)}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Pages preview */}
|
||||||
|
<div>
|
||||||
|
<h3 className="text-lg font-semibold mb-4">Document Pages ({pageCount})</h3>
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4">
|
||||||
|
{pages.map((page) => (
|
||||||
|
<div key={page.pageNumber} className="relative group">
|
||||||
|
<div className="aspect-[3/4] bg-base-200 rounded-lg overflow-hidden shadow-md hover:shadow-xl transition-shadow">
|
||||||
|
<img
|
||||||
|
src={page.thumbnailUrl}
|
||||||
|
alt={`Page ${page.pageNumber}`}
|
||||||
|
className="w-full h-full object-cover"
|
||||||
|
loading="lazy"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="text-center mt-2">
|
||||||
|
<span className="text-sm text-gray-600">Page {page.pageNumber}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default DocumentDetailView;
|
||||||
181
src/frontend/src/components/documents/DocumentGallery.jsx
Normal file
181
src/frontend/src/components/documents/DocumentGallery.jsx
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
/**
|
||||||
|
* DocumentGallery Component
|
||||||
|
* Main container for displaying documents in different view modes
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React, { useState } from 'react';
|
||||||
|
import DocumentCard from './DocumentCard';
|
||||||
|
import DocumentDetailView from './DocumentDetailView';
|
||||||
|
import ViewModeSwitcher from './ViewModeSwitcher';
|
||||||
|
import EditDocumentModal from './EditDocumentModal';
|
||||||
|
import DeleteConfirmModal from './DeleteConfirmModal';
|
||||||
|
import { useDocuments } from '../../hooks/useDocuments';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DocumentGallery component
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const DocumentGallery = () => {
|
||||||
|
const { documents, loading, error, updateDocument, deleteDocument } = useDocuments();
|
||||||
|
const [viewMode, setViewMode] = useState('large');
|
||||||
|
const [editingDocument, setEditingDocument] = useState(null);
|
||||||
|
const [deletingDocument, setDeletingDocument] = useState(null);
|
||||||
|
const [isSaving, setIsSaving] = useState(false);
|
||||||
|
const [isDeleting, setIsDeleting] = useState(false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles opening the edit modal
|
||||||
|
* @param {Object} document - Document to edit
|
||||||
|
*/
|
||||||
|
const handleEditClick = (document) => {
|
||||||
|
setEditingDocument(document);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles opening the delete confirmation modal
|
||||||
|
* @param {Object} document - Document to delete
|
||||||
|
*/
|
||||||
|
const handleDeleteClick = (document) => {
|
||||||
|
setDeletingDocument(document);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles saving document changes
|
||||||
|
* @param {Object} updates - Updates object with tags and categories
|
||||||
|
*/
|
||||||
|
const handleSaveEdit = async (updates) => {
|
||||||
|
if (!editingDocument) return;
|
||||||
|
|
||||||
|
setIsSaving(true);
|
||||||
|
const success = await updateDocument(editingDocument.id, updates);
|
||||||
|
setIsSaving(false);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
setEditingDocument(null);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles confirming document deletion
|
||||||
|
*/
|
||||||
|
const handleConfirmDelete = async () => {
|
||||||
|
if (!deletingDocument) return;
|
||||||
|
|
||||||
|
setIsDeleting(true);
|
||||||
|
const success = await deleteDocument(deletingDocument.id);
|
||||||
|
setIsDeleting(false);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
setDeletingDocument(null);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets grid classes based on view mode
|
||||||
|
* @returns {string} Tailwind CSS classes
|
||||||
|
*/
|
||||||
|
const getGridClasses = () => {
|
||||||
|
switch (viewMode) {
|
||||||
|
case 'small':
|
||||||
|
return 'grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-5 xl:grid-cols-6 gap-4';
|
||||||
|
case 'large':
|
||||||
|
return 'grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-6';
|
||||||
|
case 'detail':
|
||||||
|
return 'flex flex-col gap-6';
|
||||||
|
default:
|
||||||
|
return 'grid grid-cols-1 gap-4';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Loading state
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="flex justify-center items-center min-h-[400px] ">
|
||||||
|
<span className="loading loading-spinner loading-lg"></span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error state
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className="alert alert-error">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||||
|
</svg>
|
||||||
|
<span>Error loading documents: {error}</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty state
|
||||||
|
if (documents.length === 0) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center min-h-[400px] text-center">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" className="h-24 w-24 text-gray-300 mb-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||||
|
</svg>
|
||||||
|
<h3 className="text-xl font-semibold mb-2">No documents yet</h3>
|
||||||
|
<p className="text-gray-500">Upload your first document to get started</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="h-full flex flex-col">
|
||||||
|
{/* Header with view mode switcher - Always visible */}
|
||||||
|
<div className="flex justify-between items-center mb-6 flex-shrink-0">
|
||||||
|
<div>
|
||||||
|
<p className="text-gray-500">{documents.length} document{documents.length !== 1 ? 's' : ''}</p>
|
||||||
|
</div>
|
||||||
|
<ViewModeSwitcher
|
||||||
|
currentMode={viewMode}
|
||||||
|
onModeChange={setViewMode}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Document grid/list - Scrollable */}
|
||||||
|
<div className="flex-1 overflow-y-auto">
|
||||||
|
<div className={getGridClasses()}>
|
||||||
|
{documents.map(document => (
|
||||||
|
viewMode === 'detail' ? (
|
||||||
|
<DocumentDetailView
|
||||||
|
key={document.id}
|
||||||
|
document={document}
|
||||||
|
onEdit={() => handleEditClick(document)}
|
||||||
|
onDelete={() => handleDeleteClick(document)}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<DocumentCard
|
||||||
|
key={document.id}
|
||||||
|
document={document}
|
||||||
|
viewMode={viewMode}
|
||||||
|
onEdit={() => handleEditClick(document)}
|
||||||
|
onDelete={() => handleDeleteClick(document)}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Modals */}
|
||||||
|
<EditDocumentModal
|
||||||
|
isOpen={!!editingDocument}
|
||||||
|
document={editingDocument}
|
||||||
|
onClose={() => setEditingDocument(null)}
|
||||||
|
onSave={handleSaveEdit}
|
||||||
|
isSaving={isSaving}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<DeleteConfirmModal
|
||||||
|
isOpen={!!deletingDocument}
|
||||||
|
document={deletingDocument}
|
||||||
|
onClose={() => setDeletingDocument(null)}
|
||||||
|
onConfirm={handleConfirmDelete}
|
||||||
|
isDeleting={isDeleting}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default DocumentGallery;
|
||||||
225
src/frontend/src/components/documents/EditDocumentModal.jsx
Normal file
225
src/frontend/src/components/documents/EditDocumentModal.jsx
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
/**
|
||||||
|
* EditDocumentModal Component
|
||||||
|
* Modal dialog for editing document tags and categories
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React, { useState, useEffect } from 'react';
|
||||||
|
import { getAvailableTags, getAvailableCategories } from '../../services/documentService';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* EditDocumentModal component
|
||||||
|
* @param {Object} props
|
||||||
|
* @param {boolean} props.isOpen - Whether the modal is open
|
||||||
|
* @param {Object|null} props.document - Document to edit
|
||||||
|
* @param {function(): void} props.onClose - Callback when modal is closed
|
||||||
|
* @param {function(Object): void} props.onSave - Callback when changes are saved
|
||||||
|
* @param {boolean} props.isSaving - Whether save is in progress
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const EditDocumentModal = ({
|
||||||
|
isOpen,
|
||||||
|
document,
|
||||||
|
onClose,
|
||||||
|
onSave,
|
||||||
|
isSaving = false
|
||||||
|
}) => {
|
||||||
|
const [selectedTags, setSelectedTags] = useState([]);
|
||||||
|
const [selectedCategories, setSelectedCategories] = useState([]);
|
||||||
|
const [availableTags, setAvailableTags] = useState([]);
|
||||||
|
const [availableCategories, setAvailableCategories] = useState([]);
|
||||||
|
const [newTag, setNewTag] = useState('');
|
||||||
|
const [newCategory, setNewCategory] = useState('');
|
||||||
|
|
||||||
|
// Load available tags and categories
|
||||||
|
useEffect(() => {
|
||||||
|
const loadOptions = async () => {
|
||||||
|
const [tags, categories] = await Promise.all([
|
||||||
|
getAvailableTags(),
|
||||||
|
getAvailableCategories()
|
||||||
|
]);
|
||||||
|
setAvailableTags(tags);
|
||||||
|
setAvailableCategories(categories);
|
||||||
|
};
|
||||||
|
loadOptions();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Initialize selected values when document changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (document) {
|
||||||
|
setSelectedTags(document.tags || []);
|
||||||
|
setSelectedCategories(document.categories || []);
|
||||||
|
}
|
||||||
|
}, [document]);
|
||||||
|
|
||||||
|
const handleAddTag = (tag) => {
|
||||||
|
if (tag && !selectedTags.includes(tag)) {
|
||||||
|
setSelectedTags([...selectedTags, tag]);
|
||||||
|
}
|
||||||
|
setNewTag('');
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleRemoveTag = (tag) => {
|
||||||
|
setSelectedTags(selectedTags.filter(t => t !== tag));
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleAddCategory = (category) => {
|
||||||
|
if (category && !selectedCategories.includes(category)) {
|
||||||
|
setSelectedCategories([...selectedCategories, category]);
|
||||||
|
}
|
||||||
|
setNewCategory('');
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleRemoveCategory = (category) => {
|
||||||
|
setSelectedCategories(selectedCategories.filter(c => c !== category));
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSave = () => {
|
||||||
|
onSave({
|
||||||
|
tags: selectedTags,
|
||||||
|
categories: selectedCategories
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!isOpen || !document) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<dialog className="modal modal-open">
|
||||||
|
<div className="modal-box max-w-2xl">
|
||||||
|
<h3 className="font-bold text-lg mb-4">Edit Document</h3>
|
||||||
|
|
||||||
|
<div className="mb-4">
|
||||||
|
<p className="text-sm text-gray-500">
|
||||||
|
Document: <span className="font-semibold">{document.name}</span>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Tags Section */}
|
||||||
|
<div className="mb-6">
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text font-semibold">Tags</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
{/* Selected Tags */}
|
||||||
|
<div className="flex flex-wrap gap-2 mb-3">
|
||||||
|
{selectedTags.map(tag => (
|
||||||
|
<div key={tag} className="badge badge-primary gap-2">
|
||||||
|
{tag}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-ghost btn-xs"
|
||||||
|
onClick={() => handleRemoveTag(tag)}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
✕
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Add Tag */}
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<select
|
||||||
|
className="select select-bordered flex-1"
|
||||||
|
value={newTag}
|
||||||
|
onChange={(e) => setNewTag(e.target.value)}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
<option value="">Select a tag...</option>
|
||||||
|
{availableTags
|
||||||
|
.filter(tag => !selectedTags.includes(tag))
|
||||||
|
.map(tag => (
|
||||||
|
<option key={tag} value={tag}>{tag}</option>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
<button
|
||||||
|
className="btn btn-primary"
|
||||||
|
onClick={() => handleAddTag(newTag)}
|
||||||
|
disabled={!newTag || isSaving}
|
||||||
|
>
|
||||||
|
Add
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Categories Section */}
|
||||||
|
<div className="mb-6">
|
||||||
|
<label className="label">
|
||||||
|
<span className="label-text font-semibold">Categories</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
{/* Selected Categories */}
|
||||||
|
<div className="flex flex-wrap gap-2 mb-3">
|
||||||
|
{selectedCategories.map(category => (
|
||||||
|
<div key={category} className="badge badge-secondary gap-2">
|
||||||
|
{category}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-ghost btn-xs"
|
||||||
|
onClick={() => handleRemoveCategory(category)}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
✕
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Add Category */}
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<select
|
||||||
|
className="select select-bordered flex-1"
|
||||||
|
value={newCategory}
|
||||||
|
onChange={(e) => setNewCategory(e.target.value)}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
<option value="">Select a category...</option>
|
||||||
|
{availableCategories
|
||||||
|
.filter(cat => !selectedCategories.includes(cat))
|
||||||
|
.map(cat => (
|
||||||
|
<option key={cat} value={cat}>{cat}</option>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
<button
|
||||||
|
className="btn btn-secondary"
|
||||||
|
onClick={() => handleAddCategory(newCategory)}
|
||||||
|
disabled={!newCategory || isSaving}
|
||||||
|
>
|
||||||
|
Add
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="modal-action">
|
||||||
|
<button
|
||||||
|
className="btn btn-ghost"
|
||||||
|
onClick={onClose}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="btn btn-primary"
|
||||||
|
onClick={handleSave}
|
||||||
|
disabled={isSaving}
|
||||||
|
>
|
||||||
|
{isSaving ? (
|
||||||
|
<>
|
||||||
|
<span className="loading loading-spinner loading-sm"></span>
|
||||||
|
Saving...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
'Save Changes'
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<form method="dialog" className="modal-backdrop" onClick={onClose}>
|
||||||
|
<button disabled={isSaving}>close</button>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default EditDocumentModal;
|
||||||
51
src/frontend/src/components/documents/ViewModeSwitcher.jsx
Normal file
51
src/frontend/src/components/documents/ViewModeSwitcher.jsx
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
/**
|
||||||
|
* ViewModeSwitcher Component
|
||||||
|
* Allows users to switch between different view modes (small, large, detail)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React from 'react';
|
||||||
|
import {FaList} from "react-icons/fa6";
|
||||||
|
import {FaTh, FaThLarge} from "react-icons/fa";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {'small' | 'large' | 'detail'} ViewMode
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ViewModeSwitcher component
|
||||||
|
* @param {Object} props
|
||||||
|
* @param {ViewMode} props.currentMode - Current active view mode
|
||||||
|
* @param {function(ViewMode): void} props.onModeChange - Callback when mode changes
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const ViewModeSwitcher = ({ currentMode, onModeChange }) => {
|
||||||
|
const modes = [
|
||||||
|
{ id: 'small', label: 'Small', icon: FaTh },
|
||||||
|
{ id: 'large', label: 'Large', icon: FaThLarge },
|
||||||
|
{ id: 'detail', label: 'Detail', icon: FaList }
|
||||||
|
];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex gap-2">
|
||||||
|
{modes.map(mode => {
|
||||||
|
const IconComponent = mode.icon;
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
key={mode.id}
|
||||||
|
onClick={() => onModeChange(mode.id)}
|
||||||
|
className={`btn btn-sm ${
|
||||||
|
currentMode === mode.id ? 'btn-primary' : 'btn-ghost'
|
||||||
|
}`}
|
||||||
|
aria-label={`Switch to ${mode.label} view`}
|
||||||
|
title={`${mode.label} view`}
|
||||||
|
>
|
||||||
|
<IconComponent />
|
||||||
|
<span className="hidden sm:inline ml-1">{mode.label}</span>
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default ViewModeSwitcher;
|
||||||
205
src/frontend/src/contexts/AuthContext.jsx
Normal file
205
src/frontend/src/contexts/AuthContext.jsx
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
import React, {createContext, useContext, useEffect, useReducer} from 'react';
|
||||||
|
import authService from '../services/authService';
|
||||||
|
|
||||||
|
// Auth state actions
|
||||||
|
const AUTH_ACTIONS = {
|
||||||
|
LOGIN_START: 'LOGIN_START',
|
||||||
|
LOGIN_SUCCESS: 'LOGIN_SUCCESS',
|
||||||
|
LOGIN_FAILURE: 'LOGIN_FAILURE',
|
||||||
|
LOGOUT: 'LOGOUT',
|
||||||
|
LOAD_USER: 'LOAD_USER',
|
||||||
|
CLEAR_ERROR: 'CLEAR_ERROR',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Initial state
|
||||||
|
const initialState = {
|
||||||
|
user: null,
|
||||||
|
token: null,
|
||||||
|
isAuthenticated: false,
|
||||||
|
loading: true, // Loading true initially to check stored auth
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Auth reducer to manage state transitions
|
||||||
|
function authReducer(state, action) {
|
||||||
|
switch (action.type) {
|
||||||
|
case AUTH_ACTIONS.LOGIN_START:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
loading: true,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
case AUTH_ACTIONS.LOGIN_SUCCESS:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
user: action.payload.user,
|
||||||
|
token: action.payload.token,
|
||||||
|
isAuthenticated: true,
|
||||||
|
loading: false,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
case AUTH_ACTIONS.LOGIN_FAILURE:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
user: null,
|
||||||
|
token: null,
|
||||||
|
isAuthenticated: false,
|
||||||
|
loading: false,
|
||||||
|
error: action.payload.error,
|
||||||
|
};
|
||||||
|
|
||||||
|
case AUTH_ACTIONS.LOGOUT:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
user: null,
|
||||||
|
token: null,
|
||||||
|
isAuthenticated: false,
|
||||||
|
loading: false,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
case AUTH_ACTIONS.LOAD_USER:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
user: action.payload.user,
|
||||||
|
token: action.payload.token,
|
||||||
|
isAuthenticated: !!action.payload.token,
|
||||||
|
loading: false,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
case AUTH_ACTIONS.CLEAR_ERROR:
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
default:
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context
|
||||||
|
const AuthContext = createContext(null);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AuthProvider component to wrap the app and provide authentication state
|
||||||
|
* @param {Object} props - Component props
|
||||||
|
* @param {React.ReactNode} props.children - Child components
|
||||||
|
*/
|
||||||
|
export function AuthProvider({children}) {
|
||||||
|
const [state, dispatch] = useReducer(authReducer, initialState);
|
||||||
|
|
||||||
|
// Load stored authentication data on app startup
|
||||||
|
useEffect(() => {
|
||||||
|
const loadStoredAuth = () => {
|
||||||
|
const token = authService.getStoredToken();
|
||||||
|
const user = authService.getStoredUser();
|
||||||
|
|
||||||
|
dispatch({
|
||||||
|
type: AUTH_ACTIONS.LOAD_USER,
|
||||||
|
payload: {user, token},
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
loadStoredAuth();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Login function to authenticate user
|
||||||
|
* @param {string} username - User's username
|
||||||
|
* @param {string} password - User's password
|
||||||
|
* @returns {Promise<boolean>} True if login successful
|
||||||
|
*/
|
||||||
|
const login = async (username, password) => {
|
||||||
|
try {
|
||||||
|
dispatch({type: AUTH_ACTIONS.LOGIN_START});
|
||||||
|
|
||||||
|
const {access_token, user} = await authService.login(username, password);
|
||||||
|
|
||||||
|
dispatch({
|
||||||
|
type: AUTH_ACTIONS.LOGIN_SUCCESS,
|
||||||
|
payload: {user, token: access_token},
|
||||||
|
});
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
dispatch({
|
||||||
|
type: AUTH_ACTIONS.LOGIN_FAILURE,
|
||||||
|
payload: {error: error.message},
|
||||||
|
});
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logout function to clear authentication state
|
||||||
|
*/
|
||||||
|
const logout = () => {
|
||||||
|
authService.logout();
|
||||||
|
dispatch({type: AUTH_ACTIONS.LOGOUT});
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear error message from state
|
||||||
|
*/
|
||||||
|
const clearError = () => {
|
||||||
|
dispatch({type: AUTH_ACTIONS.CLEAR_ERROR});
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Refresh user data from API
|
||||||
|
*/
|
||||||
|
const refreshUser = async () => {
|
||||||
|
try {
|
||||||
|
const user = await authService.getCurrentUser();
|
||||||
|
dispatch({
|
||||||
|
type: AUTH_ACTIONS.LOGIN_SUCCESS,
|
||||||
|
payload: {user, token: state.token},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to refresh user data:', error);
|
||||||
|
// Don't logout on refresh failure, just log error
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Context value object
|
||||||
|
const value = {
|
||||||
|
// State
|
||||||
|
user: state.user,
|
||||||
|
token: state.token,
|
||||||
|
isAuthenticated: state.isAuthenticated,
|
||||||
|
loading: state.loading,
|
||||||
|
error: state.error,
|
||||||
|
|
||||||
|
// Actions
|
||||||
|
login,
|
||||||
|
logout,
|
||||||
|
clearError,
|
||||||
|
refreshUser,
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<AuthContext.Provider value={value}>
|
||||||
|
{children}
|
||||||
|
</AuthContext.Provider>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom hook to use authentication context
|
||||||
|
* @returns {Object} Auth context value
|
||||||
|
* @throws {Error} If used outside AuthProvider
|
||||||
|
*/
|
||||||
|
export function useAuth() {
|
||||||
|
const context = useContext(AuthContext);
|
||||||
|
|
||||||
|
if (!context) {
|
||||||
|
throw new Error('useAuth must be used within an AuthProvider');
|
||||||
|
}
|
||||||
|
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
export { AuthContext };
|
||||||
12
src/frontend/src/hooks/useAuth.js
Normal file
12
src/frontend/src/hooks/useAuth.js
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import {useContext} from 'react';
|
||||||
|
import {AuthContext} from '../contexts/AuthContext';
|
||||||
|
|
||||||
|
export const useAuth = () => {
|
||||||
|
const context = useContext(AuthContext);
|
||||||
|
|
||||||
|
if (!context) {
|
||||||
|
throw new Error('useAuth must be used within an AuthProvider');
|
||||||
|
}
|
||||||
|
|
||||||
|
return context;
|
||||||
|
};
|
||||||
85
src/frontend/src/hooks/useDocuments.js
Normal file
85
src/frontend/src/hooks/useDocuments.js
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
/**
|
||||||
|
* Custom hook for managing documents
|
||||||
|
* Handles fetching, updating, and deleting documents
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { useState, useEffect, useCallback } from 'react';
|
||||||
|
import * as documentService from '../services/documentService';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook for managing documents state and operations
|
||||||
|
* @returns {Object} Documents state and operations
|
||||||
|
*/
|
||||||
|
export const useDocuments = () => {
|
||||||
|
const [documents, setDocuments] = useState([]);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState(null);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches all documents from the service
|
||||||
|
*/
|
||||||
|
const fetchDocuments = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
const data = await documentService.getAllDocuments();
|
||||||
|
setDocuments(data);
|
||||||
|
} catch (err) {
|
||||||
|
setError(err.message);
|
||||||
|
console.error('Error fetching documents:', err);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates a document's tags and categories
|
||||||
|
* @param {string} id - Document ID
|
||||||
|
* @param {Object} updates - Updates object
|
||||||
|
* @returns {Promise<boolean>} Success status
|
||||||
|
*/
|
||||||
|
const updateDocument = useCallback(async (id, updates) => {
|
||||||
|
try {
|
||||||
|
const updatedDoc = await documentService.updateDocument(id, updates);
|
||||||
|
setDocuments(prevDocs =>
|
||||||
|
prevDocs.map(doc => (doc.id === id ? updatedDoc : doc))
|
||||||
|
);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
setError(err.message);
|
||||||
|
console.error('Error updating document:', err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes a document
|
||||||
|
* @param {string} id - Document ID
|
||||||
|
* @returns {Promise<boolean>} Success status
|
||||||
|
*/
|
||||||
|
const deleteDocument = useCallback(async (id) => {
|
||||||
|
try {
|
||||||
|
await documentService.deleteDocument(id);
|
||||||
|
setDocuments(prevDocs => prevDocs.filter(doc => doc.id !== id));
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
setError(err.message);
|
||||||
|
console.error('Error deleting document:', err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Fetch documents on mount
|
||||||
|
useEffect(() => {
|
||||||
|
fetchDocuments();
|
||||||
|
}, [fetchDocuments]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
documents,
|
||||||
|
loading,
|
||||||
|
error,
|
||||||
|
fetchDocuments,
|
||||||
|
updateDocument,
|
||||||
|
deleteDocument
|
||||||
|
};
|
||||||
|
};
|
||||||
@@ -1,68 +1,11 @@
|
|||||||
:root {
|
@tailwind base;
|
||||||
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
|
@tailwind components;
|
||||||
line-height: 1.5;
|
@tailwind utilities;
|
||||||
font-weight: 400;
|
@plugin "daisyui";
|
||||||
|
|
||||||
color-scheme: light dark;
|
|
||||||
color: rgba(255, 255, 255, 0.87);
|
|
||||||
background-color: #242424;
|
|
||||||
|
|
||||||
font-synthesis: none;
|
|
||||||
text-rendering: optimizeLegibility;
|
|
||||||
-webkit-font-smoothing: antialiased;
|
|
||||||
-moz-osx-font-smoothing: grayscale;
|
|
||||||
}
|
|
||||||
|
|
||||||
a {
|
|
||||||
font-weight: 500;
|
|
||||||
color: #646cff;
|
|
||||||
text-decoration: inherit;
|
|
||||||
}
|
|
||||||
a:hover {
|
|
||||||
color: #535bf2;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* Custom styles for the application */
|
||||||
body {
|
body {
|
||||||
margin: 0;
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||||
display: flex;
|
margin: 0;
|
||||||
place-items: center;
|
|
||||||
min-width: 320px;
|
|
||||||
min-height: 100vh;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
h1 {
|
|
||||||
font-size: 3.2em;
|
|
||||||
line-height: 1.1;
|
|
||||||
}
|
|
||||||
|
|
||||||
button {
|
|
||||||
border-radius: 8px;
|
|
||||||
border: 1px solid transparent;
|
|
||||||
padding: 0.6em 1.2em;
|
|
||||||
font-size: 1em;
|
|
||||||
font-weight: 500;
|
|
||||||
font-family: inherit;
|
|
||||||
background-color: #1a1a1a;
|
|
||||||
cursor: pointer;
|
|
||||||
transition: border-color 0.25s;
|
|
||||||
}
|
|
||||||
button:hover {
|
|
||||||
border-color: #646cff;
|
|
||||||
}
|
|
||||||
button:focus,
|
|
||||||
button:focus-visible {
|
|
||||||
outline: 4px auto -webkit-focus-ring-color;
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (prefers-color-scheme: light) {
|
|
||||||
:root {
|
|
||||||
color: #213547;
|
|
||||||
background-color: #ffffff;
|
|
||||||
}
|
|
||||||
a:hover {
|
|
||||||
color: #747bff;
|
|
||||||
}
|
|
||||||
button {
|
|
||||||
background-color: #f9f9f9;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { StrictMode } from 'react'
|
import { StrictMode } from 'react'
|
||||||
import { createRoot } from 'react-dom/client'
|
import { createRoot } from 'react-dom/client'
|
||||||
import './index.css'
|
import './index.css'
|
||||||
|
import './App.css'
|
||||||
import App from './App.jsx'
|
import App from './App.jsx'
|
||||||
|
|
||||||
createRoot(document.getElementById('root')).render(
|
createRoot(document.getElementById('root')).render(
|
||||||
|
|||||||
239
src/frontend/src/pages/DashboardPage.jsx
Normal file
239
src/frontend/src/pages/DashboardPage.jsx
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
import {useEffect, useState} from 'react';
|
||||||
|
import {useAuth} from '../hooks/useAuth';
|
||||||
|
|
||||||
|
const DashboardPage = () => {
|
||||||
|
const {user} = useAuth();
|
||||||
|
const [stats, setStats] = useState({
|
||||||
|
totalDocuments: 0,
|
||||||
|
processingJobs: 0,
|
||||||
|
completedJobs: 0,
|
||||||
|
failedJobs: 0
|
||||||
|
});
|
||||||
|
|
||||||
|
const [recentFiles, setRecentFiles] = useState([]);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// Simulate API calls for dashboard data
|
||||||
|
const fetchDashboardData = async () => {
|
||||||
|
try {
|
||||||
|
// TODO: Replace with actual API calls
|
||||||
|
setTimeout(() => {
|
||||||
|
setStats({
|
||||||
|
totalDocuments: 42,
|
||||||
|
processingJobs: 3,
|
||||||
|
completedJobs: 38,
|
||||||
|
failedJobs: 1
|
||||||
|
});
|
||||||
|
|
||||||
|
setRecentFiles([
|
||||||
|
{
|
||||||
|
id: 1,
|
||||||
|
filename: 'invoice_2024.pdf',
|
||||||
|
status: 'completed',
|
||||||
|
processedAt: '2024-01-15 14:30:00',
|
||||||
|
fileType: 'pdf'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 2,
|
||||||
|
filename: 'contract_draft.docx',
|
||||||
|
status: 'processing',
|
||||||
|
processedAt: '2024-01-15 14:25:00',
|
||||||
|
fileType: 'docx'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 3,
|
||||||
|
filename: 'receipt_scan.jpg',
|
||||||
|
status: 'completed',
|
||||||
|
processedAt: '2024-01-15 14:20:00',
|
||||||
|
fileType: 'image'
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
|
||||||
|
setLoading(false);
|
||||||
|
}, 1000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching dashboard data:', error);
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchDashboardData();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const getStatusBadge = (status) => {
|
||||||
|
const statusColors = {
|
||||||
|
completed: 'badge-success',
|
||||||
|
processing: 'badge-warning',
|
||||||
|
failed: 'badge-error',
|
||||||
|
pending: 'badge-info'
|
||||||
|
};
|
||||||
|
|
||||||
|
return `badge ${statusColors[status] || 'badge-neutral'}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileTypeIcon = (fileType) => {
|
||||||
|
const icons = {
|
||||||
|
pdf: '📄',
|
||||||
|
docx: '📝',
|
||||||
|
image: '🖼️',
|
||||||
|
txt: '📄'
|
||||||
|
};
|
||||||
|
|
||||||
|
return icons[fileType] || '📄';
|
||||||
|
};
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="flex justify-center items-center h-64">
|
||||||
|
<span className="loading loading-spinner loading-lg"></span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* Welcome Header */}
|
||||||
|
<div className="bg-base-100 rounded-lg shadow p-6">
|
||||||
|
<h1 className="text-3xl font-bold text-base-content">
|
||||||
|
Welcome back, {user?.username}!
|
||||||
|
</h1>
|
||||||
|
<p className="text-base-content/60 mt-2">
|
||||||
|
Here's your document processing overview
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Stats Cards */}
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||||
|
<div className="stat bg-base-100 rounded-lg shadow">
|
||||||
|
<div className="stat-figure text-primary">
|
||||||
|
<svg className="w-8 h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div className="stat-title">Total Documents</div>
|
||||||
|
<div className="stat-value text-primary">{stats.totalDocuments}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="stat bg-base-100 rounded-lg shadow">
|
||||||
|
<div className="stat-figure text-warning">
|
||||||
|
<svg className="w-8 h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M12 8v4l3 3m6-3a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div className="stat-title">Processing</div>
|
||||||
|
<div className="stat-value text-warning">{stats.processingJobs}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="stat bg-base-100 rounded-lg shadow">
|
||||||
|
<div className="stat-figure text-success">
|
||||||
|
<svg className="w-8 h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div className="stat-title">Completed</div>
|
||||||
|
<div className="stat-value text-success">{stats.completedJobs}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="stat bg-base-100 rounded-lg shadow">
|
||||||
|
<div className="stat-figure text-error">
|
||||||
|
<svg className="w-8 h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div className="stat-title">Failed</div>
|
||||||
|
<div className="stat-value text-error">{stats.failedJobs}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Recent Files */}
|
||||||
|
<div className="bg-base-100 rounded-lg shadow">
|
||||||
|
<div className="p-6 border-b border-base-300">
|
||||||
|
<h2 className="text-xl font-semibold">Recent Files</h2>
|
||||||
|
</div>
|
||||||
|
<div className="overflow-x-auto">
|
||||||
|
<table className="table table-zebra">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>File</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Processed At</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{recentFiles.map((file) => (
|
||||||
|
<tr key={file.id}>
|
||||||
|
<td>
|
||||||
|
<div className="flex items-center space-x-3">
|
||||||
|
<div className="text-2xl">
|
||||||
|
{getFileTypeIcon(file.fileType)}
|
||||||
|
</div>
|
||||||
|
<div className="font-medium">{file.filename}</div>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span className="badge badge-outline">
|
||||||
|
{file.fileType.toUpperCase()}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span className={getStatusBadge(file.status)}>
|
||||||
|
{file.status.charAt(0).toUpperCase() + file.status.slice(1)}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>{file.processedAt}</td>
|
||||||
|
<td>
|
||||||
|
<div className="flex space-x-2">
|
||||||
|
<button className="btn btn-sm btn-ghost">View</button>
|
||||||
|
<button className="btn btn-sm btn-ghost">Download</button>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Quick Actions */}
|
||||||
|
<div className="bg-base-100 rounded-lg shadow p-6">
|
||||||
|
<h2 className="text-xl font-semibold mb-4">Quick Actions</h2>
|
||||||
|
<div className="flex flex-wrap gap-4">
|
||||||
|
<button className="btn btn-primary">
|
||||||
|
<svg className="w-5 h-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"/>
|
||||||
|
</svg>
|
||||||
|
Upload Documents
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button className="btn btn-outline">
|
||||||
|
<svg className="w-5 h-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M9 17v-2m3 2v-4m3 4v-6m2 10H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
View Reports
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{user?.role === 'admin' && (
|
||||||
|
<button className="btn btn-outline">
|
||||||
|
<svg className="w-5 h-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2"
|
||||||
|
d="M12 4.354a4 4 0 110 5.292M15 21H3v-1a6 6 0 0112 0v1zm0 0h6v-1a6 6 0 00-9-5.197m13.5-9a2.5 2.5 0 11-5 0 2.5 2.5 0 015 0z"/>
|
||||||
|
</svg>
|
||||||
|
Manage Users
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default DashboardPage;
|
||||||
21
src/frontend/src/pages/DocumentsPage.jsx
Normal file
21
src/frontend/src/pages/DocumentsPage.jsx
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
/**
|
||||||
|
* DocumentsPage Component
|
||||||
|
* Main page for displaying and managing documents
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React from 'react';
|
||||||
|
import DocumentGallery from '../components/documents/DocumentGallery';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DocumentsPage component
|
||||||
|
* @returns {JSX.Element}
|
||||||
|
*/
|
||||||
|
const DocumentsPage = () => {
|
||||||
|
return (
|
||||||
|
<div className="h-full flex flex-col">
|
||||||
|
<DocumentGallery />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default DocumentsPage;
|
||||||
48
src/frontend/src/pages/LoginPage.jsx
Normal file
48
src/frontend/src/pages/LoginPage.jsx
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import React, {useEffect} from 'react';
|
||||||
|
import {useNavigate} from 'react-router-dom';
|
||||||
|
import {useAuth} from '../contexts/AuthContext';
|
||||||
|
import AuthLayout from '../components/auth/AuthLayout';
|
||||||
|
import LoginForm from '../components/auth/LoginForm';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LoginPage component
|
||||||
|
* Full page component that handles login functionality and redirects
|
||||||
|
*/
|
||||||
|
function LoginPage() {
|
||||||
|
const {isAuthenticated, loading} = useAuth();
|
||||||
|
const navigate = useNavigate();
|
||||||
|
|
||||||
|
// Redirect to dashboard if already authenticated
|
||||||
|
useEffect(() => {
|
||||||
|
if (!loading && isAuthenticated) {
|
||||||
|
navigate('/dashboard', {replace: true});
|
||||||
|
}
|
||||||
|
}, [isAuthenticated, loading, navigate]);
|
||||||
|
|
||||||
|
// Show loading spinner while checking authentication
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<AuthLayout>
|
||||||
|
<div className="card w-full max-w-md shadow-xl bg-base-100">
|
||||||
|
<div className="card-body items-center">
|
||||||
|
<span className="loading loading-spinner loading-lg text-primary"></span>
|
||||||
|
<p className="text-base-content/70 mt-4">Loading...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</AuthLayout>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't render login form if user is authenticated (prevents flash)
|
||||||
|
if (isAuthenticated) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<AuthLayout>
|
||||||
|
<LoginForm/>
|
||||||
|
</AuthLayout>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default LoginPage;
|
||||||
101
src/frontend/src/services/authService.js
Normal file
101
src/frontend/src/services/authService.js
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import api from '../utils/api';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Authentication service for handling login, logout, and user profile operations
|
||||||
|
*/
|
||||||
|
class AuthService {
|
||||||
|
/**
|
||||||
|
* Login user with username and password
|
||||||
|
* @param {string} username - User's username
|
||||||
|
* @param {string} password - User's password
|
||||||
|
* @returns {Promise<{access_token: string, user: Object}>} Login response with token and user data
|
||||||
|
*/
|
||||||
|
async login(username, password) {
|
||||||
|
try {
|
||||||
|
// FastAPI expects form data for OAuth2PasswordRequestForm
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('username', username);
|
||||||
|
formData.append('password', password);
|
||||||
|
|
||||||
|
const response = await api.post('/auth/login', formData, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'multipart/form-data',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const {access_token, user} = response.data;
|
||||||
|
|
||||||
|
// Store token and user data in localStorage
|
||||||
|
localStorage.setItem('access_token', access_token);
|
||||||
|
localStorage.setItem('user', JSON.stringify(user));
|
||||||
|
|
||||||
|
return {access_token, user};
|
||||||
|
} catch (error) {
|
||||||
|
// Extract error message from response
|
||||||
|
const errorMessage = error.response?.data?.detail || 'Login failed';
|
||||||
|
throw new Error(errorMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logout user by clearing stored data
|
||||||
|
*/
|
||||||
|
logout() {
|
||||||
|
localStorage.removeItem('access_token');
|
||||||
|
localStorage.removeItem('user');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current user profile from API
|
||||||
|
* @returns {Promise<Object>} Current user profile
|
||||||
|
*/
|
||||||
|
async getCurrentUser() {
|
||||||
|
try {
|
||||||
|
const response = await api.get('/auth/me');
|
||||||
|
const user = response.data;
|
||||||
|
|
||||||
|
// Update stored user data
|
||||||
|
localStorage.setItem('user', JSON.stringify(user));
|
||||||
|
|
||||||
|
return user;
|
||||||
|
} catch (error) {
|
||||||
|
const errorMessage = error.response?.data?.detail || 'Failed to get user profile';
|
||||||
|
throw new Error(errorMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if user is authenticated by verifying token existence
|
||||||
|
* @returns {boolean} True if user has valid token
|
||||||
|
*/
|
||||||
|
isAuthenticated() {
|
||||||
|
const token = localStorage.getItem('access_token');
|
||||||
|
return !!token;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get stored user data from localStorage
|
||||||
|
* @returns {Object|null} User data or null if not found
|
||||||
|
*/
|
||||||
|
getStoredUser() {
|
||||||
|
try {
|
||||||
|
const userStr = localStorage.getItem('user');
|
||||||
|
return userStr ? JSON.parse(userStr) : null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error parsing stored user data:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get stored access token from localStorage
|
||||||
|
* @returns {string|null} Access token or null if not found
|
||||||
|
*/
|
||||||
|
getStoredToken() {
|
||||||
|
return localStorage.getItem('access_token');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton instance
|
||||||
|
const authService = new AuthService();
|
||||||
|
export default authService;
|
||||||
97
src/frontend/src/services/documentService.js
Normal file
97
src/frontend/src/services/documentService.js
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* Document Service
|
||||||
|
* Handles all API calls related to documents
|
||||||
|
* Currently using mock data for development
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { mockDocuments, availableTags, availableCategories } from '../utils/mockData';
|
||||||
|
import api from '../utils/api';
|
||||||
|
|
||||||
|
// Simulate network delay
|
||||||
|
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches all documents from the API
|
||||||
|
* @returns {Promise<Array>} Array of document objects
|
||||||
|
*/
|
||||||
|
export const getAllDocuments = async () => {
|
||||||
|
try {
|
||||||
|
const response = await api.get('/api/documents');
|
||||||
|
return response.data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch documents:', error);
|
||||||
|
// Fallback to mock data in case of API error during development
|
||||||
|
console.warn('Falling back to mock data');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches a single document by ID
|
||||||
|
* @param {string} id - Document ID
|
||||||
|
* @returns {Promise<Object|null>} Document object or null if not found
|
||||||
|
*/
|
||||||
|
export const getDocumentById = async (id) => {
|
||||||
|
await delay(300);
|
||||||
|
const document = mockDocuments.find(doc => doc.id === id);
|
||||||
|
return document || null;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates a document's tags and categories
|
||||||
|
* @param {string} id - Document ID
|
||||||
|
* @param {Object} updates - Object containing tags and/or categories
|
||||||
|
* @param {Array<string>} updates.tags - New tags array
|
||||||
|
* @param {Array<string>} updates.categories - New categories array
|
||||||
|
* @returns {Promise<Object>} Updated document object
|
||||||
|
*/
|
||||||
|
export const updateDocument = async (id, updates) => {
|
||||||
|
await delay(400);
|
||||||
|
|
||||||
|
const index = mockDocuments.findIndex(doc => doc.id === id);
|
||||||
|
if (index === -1) {
|
||||||
|
throw new Error('Document not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the document
|
||||||
|
mockDocuments[index] = {
|
||||||
|
...mockDocuments[index],
|
||||||
|
...updates
|
||||||
|
};
|
||||||
|
|
||||||
|
return mockDocuments[index];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes a document
|
||||||
|
* @param {string} id - Document ID
|
||||||
|
* @returns {Promise<boolean>} True if deletion was successful
|
||||||
|
*/
|
||||||
|
export const deleteDocument = async (id) => {
|
||||||
|
await delay(300);
|
||||||
|
|
||||||
|
const index = mockDocuments.findIndex(doc => doc.id === id);
|
||||||
|
if (index === -1) {
|
||||||
|
throw new Error('Document not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
mockDocuments.splice(index, 1);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all available tags
|
||||||
|
* @returns {Promise<Array<string>>} Array of tag strings
|
||||||
|
*/
|
||||||
|
export const getAvailableTags = async () => {
|
||||||
|
await delay(200);
|
||||||
|
return [...availableTags];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all available categories
|
||||||
|
* @returns {Promise<Array<string>>} Array of category strings
|
||||||
|
*/
|
||||||
|
export const getAvailableCategories = async () => {
|
||||||
|
await delay(200);
|
||||||
|
return [...availableCategories];
|
||||||
|
};
|
||||||
57
src/frontend/src/utils/api.js
Normal file
57
src/frontend/src/utils/api.js
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
// Base API configuration
|
||||||
|
const API_BASE_URL = 'http://localhost:8000';
|
||||||
|
|
||||||
|
// Create axios instance with default configuration
|
||||||
|
const api = axios.create({
|
||||||
|
baseURL: API_BASE_URL,
|
||||||
|
timeout: 10000, // 10 seconds timeout
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
export { API_BASE_URL };
|
||||||
|
|
||||||
|
// Request interceptor to add authentication token
|
||||||
|
api.interceptors.request.use(
|
||||||
|
(config) => {
|
||||||
|
// Get token from localStorage
|
||||||
|
const token = localStorage.getItem('access_token');
|
||||||
|
if (token) {
|
||||||
|
config.headers.Authorization = `Bearer ${token}`;
|
||||||
|
}
|
||||||
|
return config;
|
||||||
|
},
|
||||||
|
(error) => {
|
||||||
|
return Promise.reject(error);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Response interceptor to handle common errors
|
||||||
|
api.interceptors.response.use(
|
||||||
|
(response) => {
|
||||||
|
return response;
|
||||||
|
},
|
||||||
|
(error) => {
|
||||||
|
// Handle 401 errors (unauthorized)
|
||||||
|
if (error.response?.status === 401) {
|
||||||
|
// Clear token from localStorage on 401
|
||||||
|
localStorage.removeItem('access_token');
|
||||||
|
localStorage.removeItem('user');
|
||||||
|
|
||||||
|
// Redirect to login page
|
||||||
|
window.location.href = '/login';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle other common errors
|
||||||
|
if (error.response?.status >= 500) {
|
||||||
|
console.error('Server error:', error.response.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Promise.reject(error);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
export default api;
|
||||||
155
src/frontend/src/utils/mockData.js
Normal file
155
src/frontend/src/utils/mockData.js
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
/**
|
||||||
|
* Mock data for PDF documents
|
||||||
|
* This file provides sample data for development and testing purposes
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a placeholder thumbnail URL
|
||||||
|
* @param {number} index - Document index for unique colors
|
||||||
|
* @returns {string} Placeholder image URL
|
||||||
|
*/
|
||||||
|
const generateThumbnailUrl = (index) => {
|
||||||
|
const colors = ['3B82F6', '10B981', 'F59E0B', 'EF4444', '8B5CF6', 'EC4899'];
|
||||||
|
const color = colors[index % colors.length];
|
||||||
|
return `https://via.placeholder.com/300x400/${color}/FFFFFF?text=Page+1`;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mock documents data
|
||||||
|
* @type {Array<Object>}
|
||||||
|
*/
|
||||||
|
export const mockDocuments = [
|
||||||
|
{
|
||||||
|
id: 'doc-001',
|
||||||
|
name: 'Contrat-2025.pdf',
|
||||||
|
originalFileType: 'DOCX',
|
||||||
|
createdAt: '2025-10-01T10:30:00Z',
|
||||||
|
fileSize: 2048576, // 2 MB
|
||||||
|
pageCount: 12,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(0),
|
||||||
|
pdfUrl: '/mock/contrat-2025.pdf',
|
||||||
|
tags: ['contrat', '2025'],
|
||||||
|
categories: ['legal']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-002',
|
||||||
|
name: 'Facture-Janvier.pdf',
|
||||||
|
originalFileType: 'XLSX',
|
||||||
|
createdAt: '2025-09-15T14:20:00Z',
|
||||||
|
fileSize: 512000, // 512 KB
|
||||||
|
pageCount: 3,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(1),
|
||||||
|
pdfUrl: '/mock/facture-janvier.pdf',
|
||||||
|
tags: ['facture', 'comptabilité'],
|
||||||
|
categories: ['finance']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-003',
|
||||||
|
name: 'Présentation-Projet.pdf',
|
||||||
|
originalFileType: 'PPTX',
|
||||||
|
createdAt: '2025-09-28T09:15:00Z',
|
||||||
|
fileSize: 5242880, // 5 MB
|
||||||
|
pageCount: 24,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(2),
|
||||||
|
pdfUrl: '/mock/presentation-projet.pdf',
|
||||||
|
tags: ['présentation', 'projet'],
|
||||||
|
categories: ['marketing']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-004',
|
||||||
|
name: 'Photo-Identité.pdf',
|
||||||
|
originalFileType: 'JPG',
|
||||||
|
createdAt: '2025-10-05T16:45:00Z',
|
||||||
|
fileSize: 204800, // 200 KB
|
||||||
|
pageCount: 1,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(3),
|
||||||
|
pdfUrl: '/mock/photo-identite.pdf',
|
||||||
|
tags: ['photo', 'identité'],
|
||||||
|
categories: ['personnel']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-005',
|
||||||
|
name: 'Manuel-Utilisateur.pdf',
|
||||||
|
originalFileType: 'PDF',
|
||||||
|
createdAt: '2025-09-20T11:00:00Z',
|
||||||
|
fileSize: 3145728, // 3 MB
|
||||||
|
pageCount: 45,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(4),
|
||||||
|
pdfUrl: '/mock/manuel-utilisateur.pdf',
|
||||||
|
tags: ['manuel', 'documentation'],
|
||||||
|
categories: ['technique']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-006',
|
||||||
|
name: 'Rapport-Annuel.pdf',
|
||||||
|
originalFileType: 'DOCX',
|
||||||
|
createdAt: '2025-08-30T13:30:00Z',
|
||||||
|
fileSize: 4194304, // 4 MB
|
||||||
|
pageCount: 67,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(5),
|
||||||
|
pdfUrl: '/mock/rapport-annuel.pdf',
|
||||||
|
tags: ['rapport', 'annuel'],
|
||||||
|
categories: ['finance', 'management']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-007',
|
||||||
|
name: 'CV-Candidat.pdf',
|
||||||
|
originalFileType: 'DOCX',
|
||||||
|
createdAt: '2025-10-02T08:00:00Z',
|
||||||
|
fileSize: 153600, // 150 KB
|
||||||
|
pageCount: 2,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(0),
|
||||||
|
pdfUrl: '/mock/cv-candidat.pdf',
|
||||||
|
tags: ['cv', 'recrutement'],
|
||||||
|
categories: ['rh']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'doc-008',
|
||||||
|
name: 'Devis-Travaux.pdf',
|
||||||
|
originalFileType: 'XLSX',
|
||||||
|
createdAt: '2025-09-25T15:20:00Z',
|
||||||
|
fileSize: 409600, // 400 KB
|
||||||
|
pageCount: 5,
|
||||||
|
thumbnailUrl: generateThumbnailUrl(1),
|
||||||
|
pdfUrl: '/mock/devis-travaux.pdf',
|
||||||
|
tags: ['devis', 'travaux'],
|
||||||
|
categories: ['finance']
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Available tags for documents
|
||||||
|
* @type {Array<string>}
|
||||||
|
*/
|
||||||
|
export const availableTags = [
|
||||||
|
'contrat',
|
||||||
|
'facture',
|
||||||
|
'présentation',
|
||||||
|
'photo',
|
||||||
|
'manuel',
|
||||||
|
'rapport',
|
||||||
|
'cv',
|
||||||
|
'devis',
|
||||||
|
'comptabilité',
|
||||||
|
'projet',
|
||||||
|
'identité',
|
||||||
|
'documentation',
|
||||||
|
'annuel',
|
||||||
|
'recrutement',
|
||||||
|
'travaux',
|
||||||
|
'2025'
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Available categories for documents
|
||||||
|
* @type {Array<string>}
|
||||||
|
*/
|
||||||
|
export const availableCategories = [
|
||||||
|
'legal',
|
||||||
|
'finance',
|
||||||
|
'marketing',
|
||||||
|
'personnel',
|
||||||
|
'technique',
|
||||||
|
'management',
|
||||||
|
'rh'
|
||||||
|
];
|
||||||
15
src/frontend/tailwind.config.js
Normal file
15
src/frontend/tailwind.config.js
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
/** @type {import('tailwindcss').Config} */
|
||||||
|
export default {
|
||||||
|
content: [
|
||||||
|
"./index.html",
|
||||||
|
"./src/**/*.{js,ts,jsx,tsx}",
|
||||||
|
],
|
||||||
|
theme: {
|
||||||
|
extend: {},
|
||||||
|
},
|
||||||
|
plugins: [require("daisyui")],
|
||||||
|
daisyui: {
|
||||||
|
themes: ["light", "dark", "cupcake"],
|
||||||
|
darkTheme: "dark",
|
||||||
|
},
|
||||||
|
}
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
import { defineConfig } from 'vite'
|
import {defineConfig} from 'vite'
|
||||||
|
import tailwindcss from '@tailwindcss/vite'
|
||||||
import react from '@vitejs/plugin-react'
|
import react from '@vitejs/plugin-react'
|
||||||
|
|
||||||
// https://vite.dev/config/
|
// https://vite.dev/config/
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
plugins: [react()],
|
plugins: [tailwindcss(), react()],
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -3,12 +3,26 @@ FROM python:3.12-slim
|
|||||||
# Set working directory
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install libmagic
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libmagic1 \
|
||||||
|
file \
|
||||||
|
pandoc \
|
||||||
|
ghostscript \
|
||||||
|
texlive-xetex \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
# Copy requirements and install dependencies
|
# Copy requirements and install dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Change the user
|
||||||
|
USER 1002:1002
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
COPY tasks/ .
|
COPY . .
|
||||||
|
|
||||||
|
|
||||||
# Command will be overridden by docker-compose
|
# Command will be overridden by docker-compose
|
||||||
CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
|
CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
|
||||||
|
|||||||
@@ -1,4 +1,20 @@
|
|||||||
|
asgiref==3.9.1
|
||||||
|
bcrypt==4.3.0
|
||||||
celery==5.5.3
|
celery==5.5.3
|
||||||
|
email-validator==2.3.0
|
||||||
|
fastapi==0.116.1
|
||||||
|
httptools==0.6.4
|
||||||
|
motor==3.7.1
|
||||||
|
pikepdf==9.11.0
|
||||||
|
pillow==11.3.0
|
||||||
|
pydantic==2.11.9
|
||||||
|
PyJWT==2.10.1
|
||||||
|
pymongo==4.15.0
|
||||||
|
PyMuPDF==1.26.4
|
||||||
|
pypandoc==1.15
|
||||||
|
python-multipart==0.0.20
|
||||||
redis==6.4.0
|
redis==6.4.0
|
||||||
pymongo==4.15.0
|
reportlab==4.4.4
|
||||||
|
uvicorn==0.35.0
|
||||||
|
python-magic==0.4.27
|
||||||
|
watchdog==6.0.0
|
||||||
0
src/worker/tasks/common/__init__.py
Normal file
0
src/worker/tasks/common/__init__.py
Normal file
73
src/worker/tasks/common/converter_utils.py
Normal file
73
src/worker/tasks/common/converter_utils.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import magic # python-magic
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedFileTypeError(Exception):
|
||||||
|
"""Exception raised when a file type is not supported."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def detect_file_type(file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
Detect the type of file using python-magic.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
'text', 'image', 'word'
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
UnsupportedFileTypeError: If file type is not supported.
|
||||||
|
"""
|
||||||
|
mime = magic.from_file(file_path, mime=True)
|
||||||
|
extension = Path(file_path).suffix
|
||||||
|
if mime.startswith("text/"):
|
||||||
|
return "text"
|
||||||
|
elif mime.startswith("image/"):
|
||||||
|
return "image"
|
||||||
|
elif mime in ("application/vnd.openxmlformats-officedocument.wordprocessingml.document",):
|
||||||
|
return "word"
|
||||||
|
elif mime == "application/pdf":
|
||||||
|
return "pdf"
|
||||||
|
elif mime == "application/vnd.ms-powerpoint":
|
||||||
|
return "powerpoint"
|
||||||
|
elif mime == "application/octet-stream" and extension in (".jpg", ".jpeg", ".png", ".gif"):
|
||||||
|
return "image"
|
||||||
|
else:
|
||||||
|
raise UnsupportedFileTypeError(f"Unsupported file type: {mime}")
|
||||||
|
|
||||||
|
|
||||||
|
def compress_pdf(input_pdf: str, output_pdf: str, quality: str = "ebook") -> None:
|
||||||
|
"""
|
||||||
|
Compress a PDF using Ghostscript.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_pdf (str): Path to the input PDF.
|
||||||
|
output_pdf (str): Path to save the compressed PDF.
|
||||||
|
quality (str): Ghostscript PDFSETTINGS option: screen, ebook, printer, prepress.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If input PDF does not exist.
|
||||||
|
RuntimeError: If Ghostscript returns an error.
|
||||||
|
"""
|
||||||
|
input_path = Path(input_pdf)
|
||||||
|
output_path = Path(output_pdf)
|
||||||
|
|
||||||
|
if not input_path.exists():
|
||||||
|
raise FileNotFoundError(f"Input PDF not found: {input_pdf}")
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
"gs",
|
||||||
|
"-sDEVICE=pdfwrite",
|
||||||
|
"-dCompatibilityLevel=1.4",
|
||||||
|
f"-dPDFSETTINGS=/{quality}",
|
||||||
|
"-dNOPAUSE",
|
||||||
|
"-dQUIET",
|
||||||
|
"-dBATCH",
|
||||||
|
f"-sOutputFile={str(output_path)}",
|
||||||
|
str(input_path),
|
||||||
|
]
|
||||||
|
|
||||||
|
result = subprocess.run(cmd)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"Ghostscript failed with return code {result.returncode}")
|
||||||
64
src/worker/tasks/common/document_utils.py
Normal file
64
src/worker/tasks/common/document_utils.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_hash(file_bytes: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Calculate SHA256 hash of file content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_bytes: Raw file content as bytes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hexadecimal SHA256 hash string
|
||||||
|
"""
|
||||||
|
return hashlib.sha256(file_bytes).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def get_object_path(file_hash):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param file_hash:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
root = settings.get_objects_folder()
|
||||||
|
return os.path.join(root, file_hash[:24], file_hash)
|
||||||
|
|
||||||
|
|
||||||
|
def save_as_object(file_path, remove_on_success=True) -> str:
|
||||||
|
"""
|
||||||
|
Read the file, get the hash and save using the hash as the filename.
|
||||||
|
:param file_path:
|
||||||
|
:param remove_on_success:
|
||||||
|
:return: hash of the file
|
||||||
|
"""
|
||||||
|
logger.info(f"Saving file {file_path} as object")
|
||||||
|
path = Path(file_path)
|
||||||
|
as_bytes = path.read_bytes()
|
||||||
|
|
||||||
|
file_hash = get_file_hash(as_bytes)
|
||||||
|
logger.info(f"File hash: {file_hash}")
|
||||||
|
|
||||||
|
object_path = get_object_path(file_hash)
|
||||||
|
if os.path.exists(object_path):
|
||||||
|
logger.info(f"Object already exists: {object_path}")
|
||||||
|
return file_hash
|
||||||
|
|
||||||
|
if not os.path.exists(os.path.dirname(object_path)):
|
||||||
|
os.makedirs(os.path.dirname(object_path))
|
||||||
|
|
||||||
|
logger.info(f"Saving object to: {object_path}")
|
||||||
|
with open(object_path, "wb") as f:
|
||||||
|
f.write(as_bytes)
|
||||||
|
|
||||||
|
if remove_on_success:
|
||||||
|
logger.info(f"Removing file: {file_path}")
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
return file_hash
|
||||||
119
src/worker/tasks/document_processing.py
Normal file
119
src/worker/tasks/document_processing.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
"""
|
||||||
|
Celery tasks for document processing with ProcessingJob status management.
|
||||||
|
|
||||||
|
This module contains Celery tasks that handle document content extraction
|
||||||
|
and update processing job statuses throughout the task lifecycle.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database.connection import get_database
|
||||||
|
from app.models.job import ProcessingStatus
|
||||||
|
from app.services.document_service import DocumentService, DocumentAlreadyExists
|
||||||
|
from app.services.job_service import JobService
|
||||||
|
from tasks.main import celery_app
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_services():
|
||||||
|
database = get_database()
|
||||||
|
document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
|
||||||
|
job_service = JobService(database=database)
|
||||||
|
return document_service, job_service
|
||||||
|
|
||||||
|
|
||||||
|
# @celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
|
||||||
|
@celery_app.task(bind=True)
|
||||||
|
def process_document(self, filepath: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Process a document file and extract its content.
|
||||||
|
|
||||||
|
This task:
|
||||||
|
1. Updates the processing job status to PROCESSING
|
||||||
|
2. Performs document content extraction
|
||||||
|
3. Updates job status to COMPLETED or FAILED based on result
|
||||||
|
|
||||||
|
Args:
|
||||||
|
self : Celery task instance
|
||||||
|
filepath: Full path to the document file to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing processing results
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: Any processing error (will trigger retry)
|
||||||
|
"""
|
||||||
|
task_id = self.request.id
|
||||||
|
logger.info(f'Task {task_id} : Starting document processing for file: "{filepath}"')
|
||||||
|
|
||||||
|
# get services
|
||||||
|
document_service, job_service = get_services()
|
||||||
|
|
||||||
|
job = None
|
||||||
|
document = None
|
||||||
|
try:
|
||||||
|
# Step 1: Create the document and a new job record for the document
|
||||||
|
document = document_service.create_document(filepath)
|
||||||
|
job = job_service.create_job(task_id=task_id, document_id=document.id)
|
||||||
|
job_service.mark_job_as_started(job_id=job.id)
|
||||||
|
logger.info(f'Task {task_id} : Created document "{document.id}". Started job "{job.id}"')
|
||||||
|
|
||||||
|
logger.info(f"Task {task_id} : Creating associated PDF")
|
||||||
|
job_service.update_job_status(job_id=job.id, status=ProcessingStatus.SAVING_PDF)
|
||||||
|
document_service.create_pdf(document.id)
|
||||||
|
|
||||||
|
logger.info(f"Task {task_id} : Creating thumbnail")
|
||||||
|
job_service.update_job_status(job_id=job.id, status=ProcessingStatus.CREATING_THUMBNAIL)
|
||||||
|
document_service.create_thumbnail(document.id)
|
||||||
|
|
||||||
|
# remove the file from the watch folder
|
||||||
|
os.remove(filepath)
|
||||||
|
|
||||||
|
# Step x: Mark job as completed
|
||||||
|
job_service.mark_job_as_completed(job_id=job.id)
|
||||||
|
logger.info(f"Task {task_id} marked as COMPLETED")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"task_id": task_id,
|
||||||
|
"filepath": filepath,
|
||||||
|
"status": "completed",
|
||||||
|
}
|
||||||
|
|
||||||
|
except DocumentAlreadyExists as e:
|
||||||
|
logger.info(f"Task {task_id} completed: {str(e)}")
|
||||||
|
if job is not None:
|
||||||
|
job_service.mark_job_as_completed(job_id=job.id)
|
||||||
|
logger.info(f"Job {task_id} marked as COMPLETED")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"task_id": task_id,
|
||||||
|
"filepath": filepath,
|
||||||
|
"status": "completed",
|
||||||
|
"message": str(e),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_message = f"Document processing failed: {str(e)}"
|
||||||
|
logger.error(f"Task {task_id} failed: {error_message}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Mark job as failed
|
||||||
|
if job is not None:
|
||||||
|
job_service.mark_job_as_failed(job_id=job.id, error_message=error_message)
|
||||||
|
logger.info(f"Job {task_id} marked as FAILED")
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to process {filepath}. error = {str(e)}")
|
||||||
|
|
||||||
|
if document is not None:
|
||||||
|
document_service.move_to_errors(document.id, filepath)
|
||||||
|
logger.info(f"Moved file {filepath} to errors/{document.id}")
|
||||||
|
|
||||||
|
except Exception as job_error:
|
||||||
|
logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
|
||||||
|
|
||||||
|
# Re-raise the exception to trigger Celery retry mechanism
|
||||||
|
raise
|
||||||
@@ -3,24 +3,31 @@ Celery worker for MyDocManager document processing tasks.
|
|||||||
|
|
||||||
This module contains all Celery tasks for processing documents.
|
This module contains all Celery tasks for processing documents.
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
|
from celery.signals import worker_process_init
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
# Environment variables
|
# Environment variables
|
||||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
REDIS_URL = settings.get_redis_url()
|
||||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
MONGODB_URL = settings.get_mongodb_url()
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Initialize Celery app
|
# Initialize Celery app
|
||||||
app = Celery(
|
celery_app = Celery(
|
||||||
"mydocmanager_worker",
|
"mydocmanager_worker",
|
||||||
broker=REDIS_URL,
|
broker=REDIS_URL,
|
||||||
backend=REDIS_URL
|
backend=REDIS_URL,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
celery_app.autodiscover_tasks(["tasks.document_processing"])
|
||||||
|
|
||||||
# Celery configuration
|
# Celery configuration
|
||||||
app.conf.update(
|
celery_app.conf.update(
|
||||||
task_serializer="json",
|
task_serializer="json",
|
||||||
accept_content=["json"],
|
accept_content=["json"],
|
||||||
result_serializer="json",
|
result_serializer="json",
|
||||||
@@ -32,82 +39,15 @@ app.conf.update(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.task(bind=True)
|
def global_init(**kwargs):
|
||||||
def test_task(self, message: str):
|
"""Initialize global variables."""
|
||||||
"""
|
logger.info(f"{'*' * 45}")
|
||||||
Test task for validating worker functionality.
|
logger.info(f"{'--' * 5}" + " Starting MyDocManager worker " + f"{'--' * 5}")
|
||||||
|
logger.info(f"{'*' * 45}")
|
||||||
Args:
|
|
||||||
message: Test message to process
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Task result with processing information
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
print(f"[WORKER] Starting test task with message: {message}")
|
|
||||||
|
|
||||||
# Simulate some work
|
|
||||||
for i in range(5):
|
|
||||||
print(f"[WORKER] Processing step {i + 1}/5...")
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
# Update task progress
|
|
||||||
self.update_state(
|
|
||||||
state="PROGRESS",
|
|
||||||
meta={
|
|
||||||
"current": i + 1,
|
|
||||||
"total": 5,
|
|
||||||
"message": f"Processing step {i + 1}"
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"status": "completed",
|
|
||||||
"message": f"Successfully processed: {message}",
|
|
||||||
"processed_at": time.time(),
|
|
||||||
"worker_id": self.request.id
|
|
||||||
}
|
|
||||||
|
|
||||||
print(f"[WORKER] Test task completed successfully: {result}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[WORKER] Test task failed: {str(exc)}")
|
|
||||||
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
|
||||||
|
|
||||||
|
|
||||||
@app.task(bind=True)
|
global_init()
|
||||||
def process_document_task(self, file_path: str):
|
|
||||||
"""
|
|
||||||
Placeholder task for document processing.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the document to process
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Processing result
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
print(f"[WORKER] Starting document processing for: {file_path}")
|
|
||||||
|
|
||||||
# Placeholder for document processing logic
|
|
||||||
time.sleep(2) # Simulate processing time
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"status": "completed",
|
|
||||||
"file_path": file_path,
|
|
||||||
"processed_at": time.time(),
|
|
||||||
"content": f"Placeholder content for {file_path}",
|
|
||||||
"worker_id": self.request.id
|
|
||||||
}
|
|
||||||
|
|
||||||
print(f"[WORKER] Document processing completed: {file_path}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[WORKER] Document processing failed for {file_path}: {str(exc)}")
|
|
||||||
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.start()
|
global_init()
|
||||||
|
celery_app.start()
|
||||||
|
|||||||
0
tests/api/__init__.py
Normal file
0
tests/api/__init__.py
Normal file
149
tests/api/test_auth_routes.py
Normal file
149
tests/api/test_auth_routes.py
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi import status, HTTPException
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
|
||||||
|
from app.api.dependencies import get_auth_service, get_user_service, get_current_user
|
||||||
|
from app.main import app # Assuming you have FastAPI app defined in app/main.py
|
||||||
|
from app.models.auth import UserRole
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
from app.models.user import UserInDB
|
||||||
|
from app.services.auth_service import AuthService
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_user():
|
||||||
|
return UserInDB(
|
||||||
|
_id=PyObjectId(),
|
||||||
|
username="testuser",
|
||||||
|
email="test@example.com",
|
||||||
|
role=UserRole.USER,
|
||||||
|
is_active=True,
|
||||||
|
hashed_password="hashed-secret",
|
||||||
|
created_at=datetime(2025, 1, 1),
|
||||||
|
updated_at=datetime(2025, 1, 2),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def override_auth_service():
|
||||||
|
mock = MagicMock(spec=AuthService)
|
||||||
|
mock.verify_user_password.return_value = True
|
||||||
|
mock.create_access_token.return_value = "fake-jwt-token"
|
||||||
|
return mock
|
||||||
|
|
||||||
|
|
||||||
|
def override_user_service(fake_user):
|
||||||
|
mock = MagicMock(spec=UserService)
|
||||||
|
mock.get_user_by_username.return_value = fake_user
|
||||||
|
return mock
|
||||||
|
|
||||||
|
|
||||||
|
def override_get_current_user(fake_user):
|
||||||
|
def _override():
|
||||||
|
return fake_user
|
||||||
|
|
||||||
|
return _override
|
||||||
|
|
||||||
|
|
||||||
|
def override_get_database():
|
||||||
|
def _override():
|
||||||
|
client = MongoClient()
|
||||||
|
db = client.test_database
|
||||||
|
return db
|
||||||
|
|
||||||
|
return _override
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------- TESTS FOR /auth/login ----------------------
|
||||||
|
class TestLogin:
|
||||||
|
def test_i_can_login_with_valid_credentials(self, client, fake_user):
|
||||||
|
auth_service = override_auth_service()
|
||||||
|
user_service = override_user_service(fake_user)
|
||||||
|
|
||||||
|
client.app.dependency_overrides[get_auth_service] = lambda: auth_service
|
||||||
|
client.app.dependency_overrides[get_user_service] = lambda: user_service
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/auth/login",
|
||||||
|
data={"username": "testuser", "password": "secret"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert "access_token" in data
|
||||||
|
assert data["user"]["username"] == "testuser"
|
||||||
|
|
||||||
|
def test_i_cannot_login_with_invalid_username(self, client):
|
||||||
|
auth_service = override_auth_service()
|
||||||
|
user_service = MagicMock(spec=UserService)
|
||||||
|
user_service.get_user_by_username.return_value = None
|
||||||
|
|
||||||
|
client.app.dependency_overrides[get_auth_service] = lambda: auth_service
|
||||||
|
client.app.dependency_overrides[get_user_service] = lambda: user_service
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/auth/login",
|
||||||
|
data={"username": "unknown", "password": "secret"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_401_UNAUTHORIZED
|
||||||
|
|
||||||
|
def test_i_cannot_login_with_inactive_user(self, client, fake_user):
|
||||||
|
fake_user.is_active = False
|
||||||
|
auth_service = override_auth_service()
|
||||||
|
user_service = override_user_service(fake_user)
|
||||||
|
client.app.dependency_overrides[get_auth_service] = lambda: auth_service
|
||||||
|
client.app.dependency_overrides[get_user_service] = lambda: user_service
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/auth/login",
|
||||||
|
data={"username": "testuser", "password": "secret"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_401_UNAUTHORIZED
|
||||||
|
|
||||||
|
def test_i_cannot_login_with_wrong_password(self, client, fake_user):
|
||||||
|
auth_service = override_auth_service()
|
||||||
|
auth_service.verify_user_password.return_value = False
|
||||||
|
user_service = override_user_service(fake_user)
|
||||||
|
client.app.dependency_overrides[get_auth_service] = lambda: auth_service
|
||||||
|
client.app.dependency_overrides[get_user_service] = lambda: user_service
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/auth/login",
|
||||||
|
data={"username": "testuser", "password": "wrong"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_401_UNAUTHORIZED
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------- TESTS FOR /auth/me ----------------------
|
||||||
|
class TesteMe:
|
||||||
|
def test_i_can_get_current_user_profile(self, client, fake_user):
|
||||||
|
client.app.dependency_overrides[get_current_user] = override_get_current_user(fake_user)
|
||||||
|
|
||||||
|
response = client.get("/auth/me")
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert data["username"] == fake_user.username
|
||||||
|
assert data["email"] == fake_user.email
|
||||||
|
|
||||||
|
def test_i_cannot_get_profile_without_authentication(self, client, monkeypatch):
|
||||||
|
def raise_http_exception():
|
||||||
|
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED)
|
||||||
|
|
||||||
|
client.app.dependency_overrides[get_current_user] = raise_http_exception
|
||||||
|
|
||||||
|
response = client.get("/auth/me")
|
||||||
|
|
||||||
|
assert response.status_code == status.HTTP_401_UNAUTHORIZED
|
||||||
167
tests/api/test_users.py
Normal file
167
tests/api/test_users.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
# File: tests/api/test_users.py
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi import status
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.api.dependencies import get_admin_user, get_user_service
|
||||||
|
from app.main import app
|
||||||
|
from app.models.auth import UserRole
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
from app.models.user import UserInDB, UserCreate
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# Fixtures
|
||||||
|
# -----------------------
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_user_admin():
|
||||||
|
return UserInDB(
|
||||||
|
_id=PyObjectId(),
|
||||||
|
username="admin",
|
||||||
|
email="admin@example.com",
|
||||||
|
role=UserRole.ADMIN,
|
||||||
|
is_active=True,
|
||||||
|
hashed_password="hashed-secret",
|
||||||
|
created_at=datetime(2025, 1, 1),
|
||||||
|
updated_at=datetime(2025, 1, 2),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_user_response():
|
||||||
|
return UserInDB(
|
||||||
|
_id=PyObjectId(),
|
||||||
|
username="other",
|
||||||
|
email="other@example.com",
|
||||||
|
role=UserRole.USER,
|
||||||
|
is_active=True,
|
||||||
|
hashed_password="hashed-secret-2",
|
||||||
|
created_at=datetime(2025, 1, 1),
|
||||||
|
updated_at=datetime(2025, 1, 2),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client(fake_user_admin):
|
||||||
|
# Fake admin dependency
|
||||||
|
def get_admin_user_override():
|
||||||
|
return fake_user_admin
|
||||||
|
|
||||||
|
# Fake user service
|
||||||
|
user_service_mock = MagicMock(spec=UserService)
|
||||||
|
|
||||||
|
def get_user_service_override():
|
||||||
|
return user_service_mock
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
client.app.dependency_overrides = {
|
||||||
|
get_admin_user: get_admin_user_override,
|
||||||
|
get_user_service: get_user_service_override
|
||||||
|
}
|
||||||
|
|
||||||
|
client.user_service_mock = user_service_mock
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# Tests
|
||||||
|
# -----------------------
|
||||||
|
|
||||||
|
class TestListUsers:
|
||||||
|
|
||||||
|
def test_i_can_list_users(self, client, fake_user_admin, fake_user_response):
|
||||||
|
client.user_service_mock.list_users.return_value = [fake_user_admin, fake_user_response]
|
||||||
|
response = client.get("/users")
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert len(data) == 2
|
||||||
|
assert data[0]["username"] == "admin"
|
||||||
|
|
||||||
|
def test_i_can_list_users_when_empty(self, client):
|
||||||
|
client.user_service_mock.list_users.return_value = []
|
||||||
|
response = client.get("/users")
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
assert response.json() == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetUserById:
|
||||||
|
|
||||||
|
def test_i_can_get_user_by_id(self, client, fake_user_response):
|
||||||
|
client.user_service_mock.get_user_by_id.return_value = fake_user_response
|
||||||
|
response = client.get(f"/users/{fake_user_response.id}")
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert data["username"] == fake_user_response.username
|
||||||
|
|
||||||
|
def test_i_cannot_get_user_by_id_not_found(self, client):
|
||||||
|
client.user_service_mock.get_user_by_id.return_value = None
|
||||||
|
response = client.get("/users/64f0c9f4b0d1c8b7b8e1f0a2")
|
||||||
|
assert response.status_code == status.HTTP_404_NOT_FOUND
|
||||||
|
assert response.json()["detail"] == "User not found"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateUser:
|
||||||
|
|
||||||
|
def test_i_can_create_user(self, client, fake_user_response):
|
||||||
|
user_data = UserCreate(username="newuser",
|
||||||
|
email="new@example.com",
|
||||||
|
password="#Passw0rd!",
|
||||||
|
role=UserRole.USER)
|
||||||
|
|
||||||
|
client.user_service_mock.create_user.return_value = fake_user_response
|
||||||
|
response = client.post("/users", json=user_data.model_dump(mode="json"))
|
||||||
|
assert response.status_code == status.HTTP_201_CREATED
|
||||||
|
data = response.json()
|
||||||
|
assert data["username"] == fake_user_response.username
|
||||||
|
|
||||||
|
def test_i_cannot_create_user_when_service_raises_value_error(self, client):
|
||||||
|
user_data = {"username": "baduser", "email": "bad@example.com", "role": "user", "password": "password"}
|
||||||
|
client.user_service_mock.create_user.side_effect = ValueError("Invalid data")
|
||||||
|
response = client.post("/users", json=user_data)
|
||||||
|
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateUser:
|
||||||
|
|
||||||
|
def test_i_can_update_user(self, client, fake_user_response):
|
||||||
|
user_data = {"username": "updateduser", "email": "updated@example.com"}
|
||||||
|
client.user_service_mock.update_user.return_value = fake_user_response
|
||||||
|
response = client.put(f"/users/{fake_user_response.id}", json=user_data)
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert data["username"] == fake_user_response.username
|
||||||
|
|
||||||
|
def test_i_cannot_update_user_not_found(self, client):
|
||||||
|
client.user_service_mock.update_user.return_value = None
|
||||||
|
user_data = {"username": "updateduser"}
|
||||||
|
response = client.put("/users/64f0c9f4b0d1c8b7b8e1f0a2", json=user_data)
|
||||||
|
assert response.status_code == status.HTTP_404_NOT_FOUND
|
||||||
|
assert response.json()["detail"] == "User not found"
|
||||||
|
|
||||||
|
def test_i_cannot_update_user_when_service_raises_value_error(self, client):
|
||||||
|
client.user_service_mock.update_user.side_effect = ValueError("Invalid update")
|
||||||
|
user_data = {"username": "badupdate"}
|
||||||
|
response = client.put("/users/64f0c9f4b0d1c8b7b8e1f0a2", json=user_data)
|
||||||
|
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||||
|
assert response.json()["detail"] == "Invalid update"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeleteUser:
|
||||||
|
|
||||||
|
def test_i_can_delete_user(self, client):
|
||||||
|
client.user_service_mock.delete_user.return_value = True
|
||||||
|
response = client.delete("/users/64f0c9f4b0d1c8b7b8e1f0a1")
|
||||||
|
assert response.status_code == status.HTTP_200_OK
|
||||||
|
data = response.json()
|
||||||
|
assert data["message"] == "User successfully deleted"
|
||||||
|
|
||||||
|
def test_i_cannot_delete_user_not_found(self, client):
|
||||||
|
client.user_service_mock.delete_user.return_value = False
|
||||||
|
response = client.delete("/users/64f0c9f4b0d1c8b7b8e1f0a2")
|
||||||
|
assert response.status_code == status.HTTP_404_NOT_FOUND
|
||||||
|
assert response.json()["detail"] == "User not found"
|
||||||
0
tests/common/__init__.py
Normal file
0
tests/common/__init__.py
Normal file
52
tests/common/test_utils.py
Normal file
52
tests/common/test_utils.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tasks.common.converter_utils import detect_file_type, UnsupportedFileTypeError
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_dir():
|
||||||
|
"""Create a temporary directory for output PDFs."""
|
||||||
|
dir_path = tempfile.mkdtemp()
|
||||||
|
yield dir_path
|
||||||
|
shutil.rmtree(dir_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_detect_text_file(temp_dir):
|
||||||
|
txt_file = Path(temp_dir) / "sample.txt"
|
||||||
|
txt_file.write_text("Sample text content")
|
||||||
|
detected_type = detect_file_type(str(txt_file))
|
||||||
|
assert detected_type == "text"
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_detect_image_file(temp_dir):
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
img_file = Path(temp_dir) / "sample.jpg"
|
||||||
|
image = Image.new("RGB", (50, 50), color="blue")
|
||||||
|
image.save(img_file)
|
||||||
|
|
||||||
|
detected_type = detect_file_type(str(img_file))
|
||||||
|
assert detected_type == "image"
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_detect_word_file(temp_dir):
|
||||||
|
import docx
|
||||||
|
|
||||||
|
docx_file = Path(temp_dir) / "sample.docx"
|
||||||
|
doc = docx.Document()
|
||||||
|
doc.add_paragraph("Sample content")
|
||||||
|
doc.save(docx_file)
|
||||||
|
|
||||||
|
detected_type = detect_file_type(str(docx_file))
|
||||||
|
assert detected_type == "word"
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_cannot_detect_unsupported_file(temp_dir):
|
||||||
|
exe_file = Path(temp_dir) / "sample.exe"
|
||||||
|
exe_file.write_bytes(b'\x4D\x5A\x90\x00\x03\x00\x00\x00')
|
||||||
|
with pytest.raises(UnsupportedFileTypeError):
|
||||||
|
detect_file_type(str(exe_file))
|
||||||
0
tests/database/__init__.py
Normal file
0
tests/database/__init__.py
Normal file
0
tests/models/__init__.py
Normal file
0
tests/models/__init__.py
Normal file
@@ -10,8 +10,8 @@ from pydantic import ValidationError
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from bson import ObjectId
|
from bson import ObjectId
|
||||||
|
|
||||||
from app.models.user import UserCreate, UserUpdate, UserInDB, UserResponse
|
from app.models.user import UserCreate, UserUpdate, UserInDB
|
||||||
from app.models.auth import UserRole
|
from app.models.auth import UserRole, UserResponse
|
||||||
|
|
||||||
|
|
||||||
class TestUserCreateModel:
|
class TestUserCreateModel:
|
||||||
@@ -349,7 +349,7 @@ class TestUserResponseModel:
|
|||||||
|
|
||||||
# Convert to response model (excluding password_hash)
|
# Convert to response model (excluding password_hash)
|
||||||
user_response = UserResponse(
|
user_response = UserResponse(
|
||||||
id=user_in_db.id,
|
_id=user_in_db.id,
|
||||||
username=user_in_db.username,
|
username=user_in_db.username,
|
||||||
email=user_in_db.email,
|
email=user_in_db.email,
|
||||||
role=user_in_db.role,
|
role=user_in_db.role,
|
||||||
0
tests/repositories/__init__.py
Normal file
0
tests/repositories/__init__.py
Normal file
611
tests/repositories/test_document_repository.py
Normal file
611
tests/repositories/test_document_repository.py
Normal file
@@ -0,0 +1,611 @@
|
|||||||
|
"""
|
||||||
|
Test suite for FileDocumentRepository with async/support.
|
||||||
|
|
||||||
|
This module contains comprehensive tests for all FileDocumentRepository methods
|
||||||
|
using mongomock-motor for in-memory MongoDB testing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
from pymongo.errors import PyMongoError
|
||||||
|
|
||||||
|
from app.database.repositories.document_repository import (
|
||||||
|
FileDocumentRepository,
|
||||||
|
MatchMethodBase,
|
||||||
|
SubsequenceMatching,
|
||||||
|
FuzzyMatching
|
||||||
|
)
|
||||||
|
from app.models.document import FileDocument, FileType, ExtractionMethod
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def in_memory_repository():
|
||||||
|
"""Create an in-memory FileDocumentRepository for testing."""
|
||||||
|
client = MongoClient()
|
||||||
|
db = client.test_database
|
||||||
|
repo = FileDocumentRepository(db)
|
||||||
|
repo.initialize()
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_file_document():
|
||||||
|
"""Sample FileDocument data for testing."""
|
||||||
|
return FileDocument(
|
||||||
|
filename="sample_document.pdf",
|
||||||
|
filepath="/home/user/documents/sample_document.pdf",
|
||||||
|
file_type=FileType.PDF,
|
||||||
|
extraction_method=ExtractionMethod.OCR,
|
||||||
|
metadata={"pages": 5, "language": "en", "author": "John Doe"},
|
||||||
|
detected_at=datetime.now(),
|
||||||
|
file_hash="a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456",
|
||||||
|
encoding="utf-8",
|
||||||
|
file_size=1024000,
|
||||||
|
mime_type="application/pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_update_data():
|
||||||
|
"""Sample update data for testing."""
|
||||||
|
return {
|
||||||
|
"extraction_method": ExtractionMethod.HYBRID,
|
||||||
|
"metadata": {"pages": 10, "language": "fr", "updated": True},
|
||||||
|
"file_size": 2048000
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def multiple_sample_files():
|
||||||
|
"""Multiple FileDocument objects for list/search testing."""
|
||||||
|
base_time = datetime.now()
|
||||||
|
return [
|
||||||
|
FileDocument(
|
||||||
|
filename="first_doc.txt",
|
||||||
|
filepath="/docs/first_doc.txt",
|
||||||
|
file_type=FileType.TXT,
|
||||||
|
extraction_method=ExtractionMethod.DIRECT_TEXT,
|
||||||
|
metadata={"words": 500},
|
||||||
|
detected_at=base_time,
|
||||||
|
file_hash="hash1" + "0" * 58,
|
||||||
|
encoding="utf-8",
|
||||||
|
file_size=5000,
|
||||||
|
mime_type="text/plain"
|
||||||
|
),
|
||||||
|
FileDocument(
|
||||||
|
filename="second_document.pdf",
|
||||||
|
filepath="/docs/second_document.pdf",
|
||||||
|
file_type=FileType.PDF,
|
||||||
|
extraction_method=ExtractionMethod.OCR,
|
||||||
|
metadata={"pages": 8},
|
||||||
|
detected_at=base_time,
|
||||||
|
file_hash="hash2" + "0" * 58,
|
||||||
|
encoding="utf-8",
|
||||||
|
file_size=10000,
|
||||||
|
mime_type="application/pdf"
|
||||||
|
),
|
||||||
|
FileDocument(
|
||||||
|
filename="third_file.docx",
|
||||||
|
filepath="/docs/third_file.docx",
|
||||||
|
file_type=FileType.DOCX,
|
||||||
|
extraction_method=ExtractionMethod.HYBRID,
|
||||||
|
metadata={"paragraphs": 15},
|
||||||
|
detected_at=base_time,
|
||||||
|
file_hash="hash3" + "0" * 58,
|
||||||
|
encoding="utf-8",
|
||||||
|
file_size=15000,
|
||||||
|
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryInitialization:
|
||||||
|
"""Tests for repository initialization."""
|
||||||
|
|
||||||
|
def test_i_can_initialize_repository(self):
|
||||||
|
"""Test repository initialization."""
|
||||||
|
# Arrange
|
||||||
|
client = MongoClient()
|
||||||
|
db = client.test_database
|
||||||
|
repo = FileDocumentRepository(db)
|
||||||
|
repo.initialize()
|
||||||
|
|
||||||
|
# Act & Assert (should not raise any exception)
|
||||||
|
assert repo.db is not None
|
||||||
|
assert repo.collection is not None
|
||||||
|
# TODO : check that the indexes are created
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryCreation:
|
||||||
|
"""Tests for file document creation functionality."""
|
||||||
|
|
||||||
|
def test_i_can_create_file_document(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test successful file document creation."""
|
||||||
|
# Act
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert created_file is not None
|
||||||
|
assert created_file.filename == sample_file_document.filename
|
||||||
|
assert created_file.filepath == sample_file_document.filepath
|
||||||
|
assert created_file.file_type == sample_file_document.file_type
|
||||||
|
assert created_file.extraction_method == sample_file_document.extraction_method
|
||||||
|
assert created_file.metadata == sample_file_document.metadata
|
||||||
|
assert created_file.file_hash == sample_file_document.file_hash
|
||||||
|
assert created_file.file_size == sample_file_document.file_size
|
||||||
|
assert created_file.mime_type == sample_file_document.mime_type
|
||||||
|
assert created_file.id is not None
|
||||||
|
assert isinstance(created_file.id, ObjectId)
|
||||||
|
|
||||||
|
def test_i_can_create_file_document_without_id(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test creating file document with _id set to None (should be removed)."""
|
||||||
|
# Arrange
|
||||||
|
sample_file_document.id = None
|
||||||
|
|
||||||
|
# Act
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert created_file is not None
|
||||||
|
assert created_file.id is not None
|
||||||
|
assert isinstance(created_file.id, ObjectId)
|
||||||
|
|
||||||
|
def test_i_cannot_create_file_document_with_pymongo_error(self, in_memory_repository,
|
||||||
|
sample_file_document, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document creation."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'insert_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(ValueError) as exc_info:
|
||||||
|
in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
assert "Failed to create file document" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryFinding:
|
||||||
|
"""Tests for file document finding functionality."""
|
||||||
|
|
||||||
|
def test_i_can_find_document_by_valid_id(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test finding file document by valid ObjectId."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_id(str(created_file.id))
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is not None
|
||||||
|
assert found_file.id == created_file.id
|
||||||
|
assert found_file.filename == created_file.filename
|
||||||
|
assert found_file.filepath == created_file.filepath
|
||||||
|
|
||||||
|
def test_i_cannot_find_document_with_invalid_id(self, in_memory_repository):
|
||||||
|
"""Test that invalid ObjectId returns None."""
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_id("invalid_id")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
def test_i_cannot_find_document_by_nonexistent_id(self, in_memory_repository):
|
||||||
|
"""Test that nonexistent but valid ObjectId returns None."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = str(ObjectId())
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_id(nonexistent_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
def test_i_can_find_document_by_file_hash(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test finding file document by file hash."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_hash(sample_file_document.file_hash)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is not None
|
||||||
|
assert found_file.file_hash == created_file.file_hash
|
||||||
|
assert found_file.id == created_file.id
|
||||||
|
|
||||||
|
def test_i_cannot_find_document_with_nonexistent_file_hash(self, in_memory_repository):
|
||||||
|
"""Test that nonexistent file hash returns None."""
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_hash("nonexistent_hash")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
def test_i_can_find_document_by_filepath(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test finding file document by filepath."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_filepath(sample_file_document.filepath)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is not None
|
||||||
|
assert found_file.filepath == created_file.filepath
|
||||||
|
assert found_file.id == created_file.id
|
||||||
|
|
||||||
|
def test_i_cannot_find_document_with_nonexistent_filepath(self, in_memory_repository):
|
||||||
|
"""Test that nonexistent filepath returns None."""
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_filepath("/nonexistent/path/file.pdf")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
def test_i_cannot_find_document_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document finding."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_file = in_memory_repository.find_document_by_hash("test_hash")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryNameMatching:
|
||||||
|
"""Tests for file document name matching functionality."""
|
||||||
|
|
||||||
|
def test_i_can_find_documents_by_name_with_fuzzy_matching(self, in_memory_repository, multiple_sample_files):
|
||||||
|
"""Test finding file documents by filename using fuzzy matching."""
|
||||||
|
# Arrange
|
||||||
|
for file_doc in multiple_sample_files:
|
||||||
|
in_memory_repository.create_document(file_doc)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
fuzzy_method = FuzzyMatching(threshold=0.5)
|
||||||
|
found_files = in_memory_repository.find_document_by_name("document", fuzzy_method)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_files) >= 1
|
||||||
|
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
|
||||||
|
# Should find files with "document" in the name
|
||||||
|
found_filenames = [f.filename for f in found_files]
|
||||||
|
assert any("document" in fname.lower() for fname in found_filenames)
|
||||||
|
|
||||||
|
def test_i_can_find_documents_by_name_with_subsequence_matching(self, in_memory_repository,
|
||||||
|
multiple_sample_files):
|
||||||
|
"""Test finding file documents by filename using subsequence matching."""
|
||||||
|
# Arrange
|
||||||
|
for file_doc in multiple_sample_files:
|
||||||
|
in_memory_repository.create_document(file_doc)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
subsequence_method = SubsequenceMatching()
|
||||||
|
found_files = in_memory_repository.find_document_by_name("doc", subsequence_method)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_files) >= 1
|
||||||
|
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
|
||||||
|
|
||||||
|
def test_i_can_find_documents_by_name_with_default_method(self, in_memory_repository, multiple_sample_files):
|
||||||
|
"""Test finding file documents by filename with default matching method."""
|
||||||
|
# Arrange
|
||||||
|
for file_doc in multiple_sample_files:
|
||||||
|
in_memory_repository.create_document(file_doc)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_files = in_memory_repository.find_document_by_name("first")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_files) >= 0
|
||||||
|
assert all(isinstance(file_doc, FileDocument) for file_doc in found_files)
|
||||||
|
|
||||||
|
def test_i_cannot_find_documents_by_name_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during document name matching."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_files = in_memory_repository.find_document_by_name("test")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_files == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryListing:
|
||||||
|
"""Tests for file document listing functionality."""
|
||||||
|
|
||||||
|
def test_i_can_list_documents_with_default_pagination(self, in_memory_repository, multiple_sample_files):
|
||||||
|
"""Test listing file documents with default pagination."""
|
||||||
|
# Arrange
|
||||||
|
for file_doc in multiple_sample_files:
|
||||||
|
in_memory_repository.create_document(file_doc)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
files = in_memory_repository.list_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(files) == len(multiple_sample_files)
|
||||||
|
assert all(isinstance(file_doc, FileDocument) for file_doc in files)
|
||||||
|
|
||||||
|
def test_i_can_list_documents_with_custom_pagination(self, in_memory_repository, multiple_sample_files):
|
||||||
|
"""Test listing file documents with custom pagination."""
|
||||||
|
# Arrange
|
||||||
|
for file_doc in multiple_sample_files:
|
||||||
|
in_memory_repository.create_document(file_doc)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
files_page1 = in_memory_repository.list_documents(skip=0, limit=2)
|
||||||
|
files_page2 = in_memory_repository.list_documents(skip=2, limit=2)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(files_page1) == 2
|
||||||
|
assert len(files_page2) == 1 # Only 3 total files
|
||||||
|
|
||||||
|
# Ensure no overlap between pages
|
||||||
|
page1_ids = [file_doc.id for file_doc in files_page1]
|
||||||
|
page2_ids = [file_doc.id for file_doc in files_page2]
|
||||||
|
assert len(set(page1_ids).intersection(set(page2_ids))) == 0
|
||||||
|
|
||||||
|
def test_i_can_list_documents_sorted_by_detected_at(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test that file documents are sorted by detected_at in descending order."""
|
||||||
|
# Arrange
|
||||||
|
file1 = sample_file_document.model_copy()
|
||||||
|
file1.filepath = "/docs/file1.pdf"
|
||||||
|
file1.filename = "file1.pdf"
|
||||||
|
file1.file_hash = "hash1" + "0" * 58
|
||||||
|
file1.detected_at = datetime(2024, 1, 1, 10, 0, 0)
|
||||||
|
|
||||||
|
file2 = sample_file_document.model_copy()
|
||||||
|
file2.filepath = "/docs/file2.pdf"
|
||||||
|
file2.filename = "file2.pdf"
|
||||||
|
file2.file_hash = "hash2" + "0" * 58
|
||||||
|
file2.detected_at = datetime(2024, 1, 2, 10, 0, 0) # Later date
|
||||||
|
|
||||||
|
created_file1 = in_memory_repository.create_document(file1)
|
||||||
|
created_file2 = in_memory_repository.create_document(file2)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
files = in_memory_repository.list_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(files) == 2
|
||||||
|
# Most recent (latest detected_at) should be first
|
||||||
|
assert files[0].id == created_file2.id
|
||||||
|
assert files[1].id == created_file1.id
|
||||||
|
|
||||||
|
def test_i_can_list_empty_documents(self, in_memory_repository):
|
||||||
|
"""Test listing file documents from empty collection."""
|
||||||
|
# Act
|
||||||
|
files = in_memory_repository.list_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert files == []
|
||||||
|
|
||||||
|
def test_i_cannot_list_documents_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document listing."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
files = in_memory_repository.list_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert files == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryUpdate:
|
||||||
|
"""Tests for file document update functionality."""
|
||||||
|
|
||||||
|
def test_i_can_update_document_successfully(self, in_memory_repository, sample_file_document,
|
||||||
|
sample_update_data):
|
||||||
|
"""Test successful file document update."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_file = in_memory_repository.update_document(str(created_file.id), sample_update_data)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_file is not None
|
||||||
|
assert updated_file.extraction_method == sample_update_data["extraction_method"]
|
||||||
|
assert updated_file.metadata == sample_update_data["metadata"]
|
||||||
|
assert updated_file.file_size == sample_update_data["file_size"]
|
||||||
|
assert updated_file.id == created_file.id
|
||||||
|
assert updated_file.filename == created_file.filename # Unchanged fields remain
|
||||||
|
assert updated_file.filepath == created_file.filepath
|
||||||
|
|
||||||
|
def test_i_can_update_document_with_partial_data(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test updating file document with partial data."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
partial_update = {"file_size": 999999}
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_file = in_memory_repository.update_document(str(created_file.id), partial_update)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_file is not None
|
||||||
|
assert updated_file.file_size == 999999
|
||||||
|
assert updated_file.filename == created_file.filename # Should remain unchanged
|
||||||
|
assert updated_file.metadata == created_file.metadata # Should remain unchanged
|
||||||
|
|
||||||
|
def test_i_can_update_document_filtering_none_values(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test that None values are filtered out from update data."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
update_with_none = {"file_size": 777777, "metadata": None}
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_file = in_memory_repository.update_document(str(created_file.id), update_with_none)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_file is not None
|
||||||
|
assert updated_file.file_size == 777777
|
||||||
|
assert updated_file.metadata == created_file.metadata # Should remain unchanged (None filtered out)
|
||||||
|
|
||||||
|
def test_i_can_update_document_with_empty_data(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test updating file document with empty data returns current document."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
empty_update = {}
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.update_document(str(created_file.id), empty_update)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is not None
|
||||||
|
assert result.filename == created_file.filename
|
||||||
|
assert result.filepath == created_file.filepath
|
||||||
|
assert result.metadata == created_file.metadata
|
||||||
|
|
||||||
|
def test_i_cannot_update_document_with_invalid_id(self, in_memory_repository, sample_update_data):
|
||||||
|
"""Test that updating with invalid ID returns None."""
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.update_document("invalid_id", sample_update_data)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_update_nonexistent_document(self, in_memory_repository, sample_update_data):
|
||||||
|
"""Test that updating nonexistent file document returns None."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = str(ObjectId())
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.update_document(nonexistent_id, sample_update_data)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_update_document_with_pymongo_error(self, in_memory_repository, sample_file_document,
|
||||||
|
sample_update_data, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document update."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find_one_and_update',
|
||||||
|
side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.update_document(str(created_file.id), sample_update_data)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryDeletion:
|
||||||
|
"""Tests for file document deletion functionality."""
|
||||||
|
|
||||||
|
def test_i_can_delete_existing_document(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test successful file document deletion."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
deletion_result = in_memory_repository.delete_document(str(created_file.id))
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert deletion_result is True
|
||||||
|
|
||||||
|
# Verify document is actually deleted
|
||||||
|
found_file = in_memory_repository.find_document_by_id(str(created_file.id))
|
||||||
|
assert found_file is None
|
||||||
|
|
||||||
|
def test_i_cannot_delete_document_with_invalid_id(self, in_memory_repository):
|
||||||
|
"""Test that deleting with invalid ID returns False."""
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.delete_document("invalid_id")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_i_cannot_delete_nonexistent_document(self, in_memory_repository):
|
||||||
|
"""Test that deleting nonexistent file document returns False."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = str(ObjectId())
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.delete_document(nonexistent_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_i_cannot_delete_document_with_pymongo_error(self, in_memory_repository, sample_file_document, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document deletion."""
|
||||||
|
# Arrange
|
||||||
|
created_file = in_memory_repository.create_document(sample_file_document)
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'delete_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.delete_document(str(created_file.id))
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileDocumentRepositoryUtilities:
|
||||||
|
"""Tests for utility methods."""
|
||||||
|
|
||||||
|
def test_i_can_count_documents(self, in_memory_repository, sample_file_document):
|
||||||
|
"""Test counting file documents."""
|
||||||
|
# Arrange
|
||||||
|
initial_count = in_memory_repository.count_documents()
|
||||||
|
in_memory_repository.create_document(sample_file_document)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
final_count = in_memory_repository.count_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert final_count == initial_count + 1
|
||||||
|
|
||||||
|
def test_i_can_count_zero_documents(self, in_memory_repository):
|
||||||
|
"""Test counting file documents in empty collection."""
|
||||||
|
# Act
|
||||||
|
count = in_memory_repository.count_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
def test_i_cannot_count_documents_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during file document counting."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'count_documents', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
count = in_memory_repository.count_documents()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestMatchingMethods:
|
||||||
|
"""Tests for matching method classes."""
|
||||||
|
|
||||||
|
def test_i_can_create_fuzzy_matching_with_default_threshold(self):
|
||||||
|
"""Test creating FuzzyMatching with default threshold."""
|
||||||
|
# Act
|
||||||
|
fuzzy = FuzzyMatching()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert fuzzy.threshold == 0.6
|
||||||
|
|
||||||
|
def test_i_can_create_fuzzy_matching_with_custom_threshold(self):
|
||||||
|
"""Test creating FuzzyMatching with custom threshold."""
|
||||||
|
# Act
|
||||||
|
fuzzy = FuzzyMatching(threshold=0.8)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert fuzzy.threshold == 0.8
|
||||||
|
|
||||||
|
def test_i_can_create_subsequence_matching(self):
|
||||||
|
"""Test creating SubsequenceMatching."""
|
||||||
|
# Act
|
||||||
|
subsequence = SubsequenceMatching()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert isinstance(subsequence, MatchMethodBase)
|
||||||
|
assert isinstance(subsequence, SubsequenceMatching)
|
||||||
496
tests/repositories/test_job_repository.py
Normal file
496
tests/repositories/test_job_repository.py
Normal file
@@ -0,0 +1,496 @@
|
|||||||
|
"""
|
||||||
|
Test suite for JobRepository with async/support.
|
||||||
|
|
||||||
|
This module contains comprehensive tests for all JobRepository methods
|
||||||
|
using mongomock-motor for in-memory MongoDB testing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
from mongomock_motor import AsyncMongoMockClient
|
||||||
|
from pymongo.errors import PyMongoError
|
||||||
|
|
||||||
|
from app.database.repositories.job_repository import JobRepository
|
||||||
|
from app.exceptions.job_exceptions import JobRepositoryError
|
||||||
|
from app.models.job import ProcessingJob, ProcessingStatus
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def in_memory_repository():
|
||||||
|
"""Create an in-memory JobRepository for testing."""
|
||||||
|
client = MongoClient()
|
||||||
|
db = client.test_database
|
||||||
|
repo = JobRepository(db)
|
||||||
|
repo.initialize()
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_document_id():
|
||||||
|
"""Sample document ObjectId for testing."""
|
||||||
|
return PyObjectId()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_task_id():
|
||||||
|
"""Sample Celery task ID for testing."""
|
||||||
|
return "celery-task-12345-abcde"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def multiple_sample_jobs():
|
||||||
|
"""Multiple ProcessingJob objects for testing."""
|
||||||
|
doc_id_1 = ObjectId()
|
||||||
|
doc_id_2 = ObjectId()
|
||||||
|
base_time = datetime.utcnow()
|
||||||
|
|
||||||
|
return [
|
||||||
|
ProcessingJob(
|
||||||
|
document_id=doc_id_1,
|
||||||
|
status=ProcessingStatus.PENDING,
|
||||||
|
task_id="task-1",
|
||||||
|
created_at=base_time,
|
||||||
|
started_at=None,
|
||||||
|
completed_at=None,
|
||||||
|
error_message=None
|
||||||
|
),
|
||||||
|
ProcessingJob(
|
||||||
|
document_id=doc_id_2,
|
||||||
|
status=ProcessingStatus.PROCESSING,
|
||||||
|
task_id="task-2",
|
||||||
|
created_at=base_time,
|
||||||
|
started_at=base_time,
|
||||||
|
completed_at=None,
|
||||||
|
error_message=None
|
||||||
|
),
|
||||||
|
ProcessingJob(
|
||||||
|
document_id=doc_id_1,
|
||||||
|
status=ProcessingStatus.COMPLETED,
|
||||||
|
task_id="task-3",
|
||||||
|
created_at=base_time,
|
||||||
|
started_at=base_time,
|
||||||
|
completed_at=base_time,
|
||||||
|
error_message=None
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryInitialization:
|
||||||
|
"""Tests for repository initialization."""
|
||||||
|
|
||||||
|
def test_i_can_initialize_repository(self):
|
||||||
|
"""Test repository initialization."""
|
||||||
|
# Arrange
|
||||||
|
client = AsyncMongoMockClient()
|
||||||
|
db = client.test_database
|
||||||
|
repo = JobRepository(db)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
initialized_repo = repo.initialize()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert initialized_repo is repo
|
||||||
|
assert repo.db is not None
|
||||||
|
assert repo.collection is not None
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryCreation:
|
||||||
|
"""Tests for job creation functionality."""
|
||||||
|
|
||||||
|
def test_i_can_create_job_with_task_id(self, in_memory_repository, sample_document_id, sample_task_id):
|
||||||
|
"""Test successful job creation with task ID."""
|
||||||
|
# Act
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert created_job is not None
|
||||||
|
assert created_job.document_id == sample_document_id
|
||||||
|
assert created_job.task_id == sample_task_id
|
||||||
|
assert created_job.status == ProcessingStatus.PENDING
|
||||||
|
assert created_job.created_at is not None
|
||||||
|
assert created_job.started_at is None
|
||||||
|
assert created_job.completed_at is None
|
||||||
|
assert created_job.error_message is None
|
||||||
|
assert created_job.id is not None
|
||||||
|
assert isinstance(created_job.id, ObjectId)
|
||||||
|
|
||||||
|
def test_i_can_create_job_without_task_id(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test successful job creation without task ID."""
|
||||||
|
# Act
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert created_job is not None
|
||||||
|
assert created_job.document_id == sample_document_id
|
||||||
|
assert created_job.task_id is None
|
||||||
|
assert created_job.status == ProcessingStatus.PENDING
|
||||||
|
assert created_job.created_at is not None
|
||||||
|
assert created_job.started_at is None
|
||||||
|
assert created_job.completed_at is None
|
||||||
|
assert created_job.error_message is None
|
||||||
|
assert created_job.id is not None
|
||||||
|
assert isinstance(created_job.id, ObjectId)
|
||||||
|
|
||||||
|
def test_i_cannot_create_duplicate_job_for_document(self, in_memory_repository, sample_document_id,
|
||||||
|
sample_task_id):
|
||||||
|
"""Test that creating job with duplicate document_id raises DuplicateKeyError."""
|
||||||
|
# Arrange
|
||||||
|
in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.create_job(sample_document_id, "different-task-id")
|
||||||
|
|
||||||
|
assert "create_job" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_i_cannot_create_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
|
||||||
|
"""Test handling of PyMongo errors during job creation."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'insert_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.create_job(sample_document_id)
|
||||||
|
|
||||||
|
assert "create_job" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryFinding:
|
||||||
|
"""Tests for job finding functionality."""
|
||||||
|
|
||||||
|
def test_i_can_find_job_by_valid_id(self, in_memory_repository, sample_document_id, sample_task_id):
|
||||||
|
"""Test finding job by valid ObjectId."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_job = in_memory_repository.find_job_by_id(created_job.id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_job is not None
|
||||||
|
assert found_job.id == created_job.id
|
||||||
|
assert found_job.document_id == created_job.document_id
|
||||||
|
assert found_job.task_id == created_job.task_id
|
||||||
|
assert found_job.status == created_job.status
|
||||||
|
|
||||||
|
def test_i_cannot_find_job_by_nonexistent_id(self, in_memory_repository):
|
||||||
|
"""Test that nonexistent ObjectId returns None."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = PyObjectId()
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_job = in_memory_repository.find_job_by_id(nonexistent_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_job is None
|
||||||
|
|
||||||
|
def test_i_cannot_find_job_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during job finding."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.find_job_by_id(PyObjectId())
|
||||||
|
|
||||||
|
assert "get_job_by_id" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_i_can_find_jobs_by_document_id(self, in_memory_repository, sample_document_id, sample_task_id):
|
||||||
|
"""Test finding jobs by document ID."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_jobs = in_memory_repository.find_jobs_by_document_id(sample_document_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_jobs) == 1
|
||||||
|
assert found_jobs[0].id == created_job.id
|
||||||
|
assert found_jobs[0].document_id == sample_document_id
|
||||||
|
|
||||||
|
def test_i_can_find_empty_jobs_list_for_nonexistent_document(self, in_memory_repository):
|
||||||
|
"""Test that nonexistent document ID returns empty list."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = ObjectId()
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_jobs = in_memory_repository.find_jobs_by_document_id(nonexistent_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_jobs == []
|
||||||
|
|
||||||
|
def test_i_cannot_find_jobs_by_document_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during finding jobs by document ID."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.find_jobs_by_document_id(PyObjectId())
|
||||||
|
|
||||||
|
assert "get_jobs_by_file_id" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("status", [
|
||||||
|
ProcessingStatus.PENDING,
|
||||||
|
ProcessingStatus.PROCESSING,
|
||||||
|
ProcessingStatus.COMPLETED
|
||||||
|
])
|
||||||
|
def test_i_can_find_jobs_by_pending_status(self, in_memory_repository, sample_document_id, status):
|
||||||
|
"""Test finding jobs by PENDING status."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
in_memory_repository.update_job_status(created_job.id, status)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_jobs = in_memory_repository.get_jobs_by_status(status)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_jobs) == 1
|
||||||
|
assert found_jobs[0].id == created_job.id
|
||||||
|
assert found_jobs[0].status == status
|
||||||
|
|
||||||
|
def test_i_can_find_jobs_by_failed_status(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test finding jobs by FAILED status."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED, "Test error")
|
||||||
|
|
||||||
|
# Act
|
||||||
|
found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(found_jobs) == 1
|
||||||
|
assert found_jobs[0].id == created_job.id
|
||||||
|
assert found_jobs[0].status == ProcessingStatus.FAILED
|
||||||
|
assert found_jobs[0].error_message == "Test error"
|
||||||
|
|
||||||
|
def test_i_can_find_empty_jobs_list_for_unused_status(self, in_memory_repository):
|
||||||
|
"""Test that unused status returns empty list."""
|
||||||
|
# Act
|
||||||
|
found_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert found_jobs == []
|
||||||
|
|
||||||
|
def test_i_cannot_find_jobs_by_status_with_pymongo_error(self, in_memory_repository, mocker):
|
||||||
|
"""Test handling of PyMongo errors during finding jobs by status."""
|
||||||
|
# Arrange
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING)
|
||||||
|
|
||||||
|
assert "get_jobs_by_status" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryStatusUpdate:
|
||||||
|
"""Tests for job status update functionality."""
|
||||||
|
|
||||||
|
def test_i_can_update_job_status_to_processing(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test updating job status to PROCESSING with started_at timestamp."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_job is not None
|
||||||
|
assert updated_job.id == created_job.id
|
||||||
|
assert updated_job.status == ProcessingStatus.PROCESSING
|
||||||
|
assert updated_job.started_at is not None
|
||||||
|
assert updated_job.completed_at is None
|
||||||
|
assert updated_job.error_message is None
|
||||||
|
|
||||||
|
def test_i_can_update_job_status_to_completed(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test updating job status to COMPLETED with completed_at timestamp."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.PROCESSING)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_job is not None
|
||||||
|
assert updated_job.id == created_job.id
|
||||||
|
assert updated_job.status == ProcessingStatus.COMPLETED
|
||||||
|
assert updated_job.started_at is not None
|
||||||
|
assert updated_job.completed_at is not None
|
||||||
|
assert updated_job.error_message is None
|
||||||
|
|
||||||
|
def test_i_can_update_job_status_to_failed_with_error(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test updating job status to FAILED with error message and completed_at timestamp."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
error_message = "Processing failed due to invalid format"
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_job = in_memory_repository.update_job_status(
|
||||||
|
created_job.id, ProcessingStatus.FAILED, error_message
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_job is not None
|
||||||
|
assert updated_job.id == created_job.id
|
||||||
|
assert updated_job.status == ProcessingStatus.FAILED
|
||||||
|
assert updated_job.completed_at is not None
|
||||||
|
assert updated_job.error_message == error_message
|
||||||
|
|
||||||
|
def test_i_can_update_job_status_to_failed_without_error(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test updating job status to FAILED without error message."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
updated_job = in_memory_repository.update_job_status(created_job.id, ProcessingStatus.FAILED)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert updated_job is not None
|
||||||
|
assert updated_job.id == created_job.id
|
||||||
|
assert updated_job.status == ProcessingStatus.FAILED
|
||||||
|
assert updated_job.completed_at is not None
|
||||||
|
assert updated_job.error_message is None
|
||||||
|
|
||||||
|
def test_i_cannot_update_nonexistent_job_status(self, in_memory_repository):
|
||||||
|
"""Test that updating nonexistent job returns None."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = ObjectId()
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.update_job_status(nonexistent_id, ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_update_job_status_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
|
||||||
|
"""Test handling of PyMongo errors during job status update."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'find_one_and_update',
|
||||||
|
side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.update_job_status(created_job.id, ProcessingStatus.COMPLETED)
|
||||||
|
|
||||||
|
assert "update_job_status" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryDeletion:
|
||||||
|
"""Tests for job deletion functionality."""
|
||||||
|
|
||||||
|
def test_i_can_delete_existing_job(self, in_memory_repository, sample_document_id):
|
||||||
|
"""Test successful job deletion."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
deletion_result = in_memory_repository.delete_job(created_job.id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert deletion_result is True
|
||||||
|
|
||||||
|
# Verify job is actually deleted
|
||||||
|
found_job = in_memory_repository.find_job_by_id(created_job.id)
|
||||||
|
assert found_job is None
|
||||||
|
|
||||||
|
def test_i_cannot_delete_nonexistent_job(self, in_memory_repository):
|
||||||
|
"""Test that deleting nonexistent job returns False."""
|
||||||
|
# Arrange
|
||||||
|
nonexistent_id = ObjectId()
|
||||||
|
|
||||||
|
# Act
|
||||||
|
result = in_memory_repository.delete_job(nonexistent_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_i_cannot_delete_job_with_pymongo_error(self, in_memory_repository, sample_document_id, mocker):
|
||||||
|
"""Test handling of PyMongo errors during job deletion."""
|
||||||
|
# Arrange
|
||||||
|
created_job = in_memory_repository.create_job(sample_document_id)
|
||||||
|
mocker.patch.object(in_memory_repository.collection, 'delete_one', side_effect=PyMongoError("Database error"))
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
with pytest.raises(JobRepositoryError) as exc_info:
|
||||||
|
in_memory_repository.delete_job(created_job.id)
|
||||||
|
|
||||||
|
assert "delete_job" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobRepositoryComplexScenarios:
|
||||||
|
"""Tests for complex job repository scenarios."""
|
||||||
|
|
||||||
|
def test_i_can_handle_complete_job_lifecycle(self, in_memory_repository, sample_document_id, sample_task_id):
|
||||||
|
"""Test complete job lifecycle from creation to completion."""
|
||||||
|
# Create job
|
||||||
|
job = in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
assert job.status == ProcessingStatus.PENDING
|
||||||
|
assert job.started_at is None
|
||||||
|
assert job.completed_at is None
|
||||||
|
|
||||||
|
# Start processing
|
||||||
|
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING)
|
||||||
|
assert job.status == ProcessingStatus.PROCESSING
|
||||||
|
assert job.started_at is not None
|
||||||
|
assert job.completed_at is None
|
||||||
|
|
||||||
|
# Complete job
|
||||||
|
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.COMPLETED)
|
||||||
|
assert job.status == ProcessingStatus.COMPLETED
|
||||||
|
assert job.started_at is not None
|
||||||
|
assert job.completed_at is not None
|
||||||
|
assert job.error_message is None
|
||||||
|
|
||||||
|
def test_i_can_handle_job_failure_scenario(self, in_memory_repository, sample_document_id, sample_task_id):
|
||||||
|
"""Test job failure scenario with error message."""
|
||||||
|
# Create and start job
|
||||||
|
job = in_memory_repository.create_job(sample_document_id, sample_task_id)
|
||||||
|
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.PROCESSING)
|
||||||
|
|
||||||
|
# Fail job with error
|
||||||
|
error_msg = "File format not supported"
|
||||||
|
job = in_memory_repository.update_job_status(job.id, ProcessingStatus.FAILED, error_msg)
|
||||||
|
|
||||||
|
# Assert failure state
|
||||||
|
assert job.status == ProcessingStatus.FAILED
|
||||||
|
assert job.started_at is not None
|
||||||
|
assert job.completed_at is not None
|
||||||
|
assert job.error_message == error_msg
|
||||||
|
|
||||||
|
def test_i_can_handle_multiple_documents_with_different_statuses(self, in_memory_repository):
|
||||||
|
"""Test managing multiple jobs for different documents with various statuses."""
|
||||||
|
# Create jobs for different documents
|
||||||
|
doc1 = PyObjectId()
|
||||||
|
doc2 = PyObjectId()
|
||||||
|
doc3 = PyObjectId()
|
||||||
|
|
||||||
|
job1 = in_memory_repository.create_job(doc1, "task-1")
|
||||||
|
job2 = in_memory_repository.create_job(doc2, "task-2")
|
||||||
|
job3 = in_memory_repository.create_job(doc3, "task-3")
|
||||||
|
|
||||||
|
# Update to different statuses
|
||||||
|
in_memory_repository.update_job_status(job1.id, ProcessingStatus.PROCESSING)
|
||||||
|
in_memory_repository.update_job_status(job2.id, ProcessingStatus.COMPLETED)
|
||||||
|
in_memory_repository.update_job_status(job3.id, ProcessingStatus.FAILED, "Error occurred")
|
||||||
|
|
||||||
|
# Verify status queries
|
||||||
|
pending_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PENDING)
|
||||||
|
processing_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.PROCESSING)
|
||||||
|
completed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.COMPLETED)
|
||||||
|
failed_jobs = in_memory_repository.get_jobs_by_status(ProcessingStatus.FAILED)
|
||||||
|
|
||||||
|
assert len(pending_jobs) == 0
|
||||||
|
assert len(processing_jobs) == 1
|
||||||
|
assert len(completed_jobs) == 1
|
||||||
|
assert len(failed_jobs) == 1
|
||||||
|
|
||||||
|
assert processing_jobs[0].id == job1.id
|
||||||
|
assert completed_jobs[0].id == job2.id
|
||||||
|
assert failed_jobs[0].id == job3.id
|
||||||
@@ -1,29 +1,26 @@
|
|||||||
"""
|
"""
|
||||||
Test suite for UserRepository with async/await support.
|
Test suite for UserRepository with async/support.
|
||||||
|
|
||||||
This module contains comprehensive tests for all UserRepository methods
|
This module contains comprehensive tests for all UserRepository methods
|
||||||
using mongomock-motor for in-memory MongoDB testing.
|
using mongomock-motor for in-memory MongoDB testing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import pytest_asyncio
|
|
||||||
from bson import ObjectId
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
from pymongo.errors import DuplicateKeyError
|
from pymongo.errors import DuplicateKeyError
|
||||||
from mongomock_motor import AsyncMongoMockClient
|
|
||||||
|
|
||||||
from app.database.repositories.user_repository import UserRepository
|
from app.database.repositories.user_repository import UserRepository
|
||||||
from app.models.user import UserCreate, UserUpdate, UserInDB
|
from app.models.user import UserCreate, UserUpdate
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest.fixture
|
||||||
async def in_memory_repository():
|
def in_memory_repository():
|
||||||
"""Create an in-memory UserRepository for testing."""
|
"""Create an in-memory UserRepository for testing."""
|
||||||
client = AsyncMongoMockClient()
|
client = MongoClient()
|
||||||
db = client.test_database
|
db = client.test_database
|
||||||
repo = UserRepository(db)
|
repo = UserRepository(db)
|
||||||
await repo.initialize()
|
repo.initialize()
|
||||||
return repo
|
return repo
|
||||||
|
|
||||||
|
|
||||||
@@ -51,11 +48,10 @@ def sample_user_update():
|
|||||||
class TestUserRepositoryCreation:
|
class TestUserRepositoryCreation:
|
||||||
"""Tests for user creation functionality."""
|
"""Tests for user creation functionality."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_create_user(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_create_user(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test successful user creation."""
|
"""Test successful user creation."""
|
||||||
# Act
|
# Act
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert created_user is not None
|
assert created_user is not None
|
||||||
@@ -68,15 +64,14 @@ class TestUserRepositoryCreation:
|
|||||||
assert created_user.updated_at is not None
|
assert created_user.updated_at is not None
|
||||||
assert created_user.hashed_password != sample_user_create.password # Should be hashed
|
assert created_user.hashed_password != sample_user_create.password # Should be hashed
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_create_user_with_duplicate_username(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_cannot_create_user_with_duplicate_username(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test that creating user with duplicate username raises DuplicateKeyError."""
|
"""Test that creating user with duplicate username raises DuplicateKeyError."""
|
||||||
# Arrange
|
# Arrange
|
||||||
await in_memory_repository.create_user(sample_user_create)
|
in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
with pytest.raises(DuplicateKeyError) as exc_info:
|
with pytest.raises(DuplicateKeyError) as exc_info:
|
||||||
await in_memory_repository.create_user(sample_user_create)
|
in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
assert "already exists" in str(exc_info.value)
|
assert "already exists" in str(exc_info.value)
|
||||||
|
|
||||||
@@ -84,14 +79,13 @@ class TestUserRepositoryCreation:
|
|||||||
class TestUserRepositoryFinding:
|
class TestUserRepositoryFinding:
|
||||||
"""Tests for user finding functionality."""
|
"""Tests for user finding functionality."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_find_user_by_id(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_find_user_by_id(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test finding user by valid ID."""
|
"""Test finding user by valid ID."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_id(str(created_user.id))
|
found_user = in_memory_repository.find_user_by_id(str(created_user.id))
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is not None
|
assert found_user is not None
|
||||||
@@ -99,69 +93,63 @@ class TestUserRepositoryFinding:
|
|||||||
assert found_user.username == created_user.username
|
assert found_user.username == created_user.username
|
||||||
assert found_user.email == created_user.email
|
assert found_user.email == created_user.email
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_find_user_by_invalid_id(self, in_memory_repository):
|
||||||
async def test_i_cannot_find_user_by_invalid_id(self, in_memory_repository):
|
|
||||||
"""Test that invalid ObjectId returns None."""
|
"""Test that invalid ObjectId returns None."""
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_id("invalid_id")
|
found_user = in_memory_repository.find_user_by_id("invalid_id")
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is None
|
assert found_user is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_find_user_by_nonexistent_id(self, in_memory_repository):
|
||||||
async def test_i_cannot_find_user_by_nonexistent_id(self, in_memory_repository):
|
|
||||||
"""Test that nonexistent but valid ObjectId returns None."""
|
"""Test that nonexistent but valid ObjectId returns None."""
|
||||||
# Arrange
|
# Arrange
|
||||||
nonexistent_id = str(ObjectId())
|
nonexistent_id = str(ObjectId())
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_id(nonexistent_id)
|
found_user = in_memory_repository.find_user_by_id(nonexistent_id)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is None
|
assert found_user is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_find_user_by_username(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_find_user_by_username(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test finding user by username."""
|
"""Test finding user by username."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_username(sample_user_create.username)
|
found_user = in_memory_repository.find_user_by_username(sample_user_create.username)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is not None
|
assert found_user is not None
|
||||||
assert found_user.username == created_user.username
|
assert found_user.username == created_user.username
|
||||||
assert found_user.id == created_user.id
|
assert found_user.id == created_user.id
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_find_user_by_nonexistent_username(self, in_memory_repository):
|
||||||
async def test_i_cannot_find_user_by_nonexistent_username(self, in_memory_repository):
|
|
||||||
"""Test that nonexistent username returns None."""
|
"""Test that nonexistent username returns None."""
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_username("nonexistent")
|
found_user = in_memory_repository.find_user_by_username("nonexistent")
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is None
|
assert found_user is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_find_user_by_email(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_find_user_by_email(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test finding user by email."""
|
"""Test finding user by email."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_email(str(sample_user_create.email))
|
found_user = in_memory_repository.find_user_by_email(str(sample_user_create.email))
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is not None
|
assert found_user is not None
|
||||||
assert found_user.email == created_user.email
|
assert found_user.email == created_user.email
|
||||||
assert found_user.id == created_user.id
|
assert found_user.id == created_user.id
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_find_user_by_nonexistent_email(self, in_memory_repository):
|
||||||
async def test_i_cannot_find_user_by_nonexistent_email(self, in_memory_repository):
|
|
||||||
"""Test that nonexistent email returns None."""
|
"""Test that nonexistent email returns None."""
|
||||||
# Act
|
# Act
|
||||||
found_user = await in_memory_repository.find_user_by_email("nonexistent@example.com")
|
found_user = in_memory_repository.find_user_by_email("nonexistent@example.com")
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert found_user is None
|
assert found_user is None
|
||||||
@@ -170,15 +158,14 @@ class TestUserRepositoryFinding:
|
|||||||
class TestUserRepositoryUpdate:
|
class TestUserRepositoryUpdate:
|
||||||
"""Tests for user update functionality."""
|
"""Tests for user update functionality."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_update_user(self, in_memory_repository, sample_user_create, sample_user_update):
|
||||||
async def test_i_can_update_user(self, in_memory_repository, sample_user_create, sample_user_update):
|
|
||||||
"""Test successful user update."""
|
"""Test successful user update."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
original_updated_at = created_user.updated_at
|
original_updated_at = created_user.updated_at
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
updated_user = await in_memory_repository.update_user(str(created_user.id), sample_user_update)
|
updated_user = in_memory_repository.update_user(str(created_user.id), sample_user_update)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert updated_user is not None
|
assert updated_user is not None
|
||||||
@@ -187,24 +174,22 @@ class TestUserRepositoryUpdate:
|
|||||||
assert updated_user.role == sample_user_update.role
|
assert updated_user.role == sample_user_update.role
|
||||||
assert updated_user.id == created_user.id
|
assert updated_user.id == created_user.id
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_update_user_with_invalid_id(self, in_memory_repository, sample_user_update):
|
||||||
async def test_i_cannot_update_user_with_invalid_id(self, in_memory_repository, sample_user_update):
|
|
||||||
"""Test that updating with invalid ID returns None."""
|
"""Test that updating with invalid ID returns None."""
|
||||||
# Act
|
# Act
|
||||||
result = await in_memory_repository.update_user("invalid_id", sample_user_update)
|
result = in_memory_repository.update_user("invalid_id", sample_user_update)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_update_user_with_partial_data(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_update_user_with_partial_data(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test updating user with partial data."""
|
"""Test updating user with partial data."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
partial_update = UserUpdate(username="newusername")
|
partial_update = UserUpdate(username="newusername")
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
updated_user = await in_memory_repository.update_user(str(created_user.id), partial_update)
|
updated_user = in_memory_repository.update_user(str(created_user.id), partial_update)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert updated_user is not None
|
assert updated_user is not None
|
||||||
@@ -212,15 +197,14 @@ class TestUserRepositoryUpdate:
|
|||||||
assert updated_user.email == created_user.email # Should remain unchanged
|
assert updated_user.email == created_user.email # Should remain unchanged
|
||||||
assert updated_user.role == created_user.role # Should remain unchanged
|
assert updated_user.role == created_user.role # Should remain unchanged
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_update_user_with_empty_data(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_update_user_with_empty_data(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test updating user with empty data returns current user."""
|
"""Test updating user with empty data returns current user."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
empty_update = UserUpdate()
|
empty_update = UserUpdate()
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
result = await in_memory_repository.update_user(str(created_user.id), empty_update)
|
result = in_memory_repository.update_user(str(created_user.id), empty_update)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert result is not None
|
assert result is not None
|
||||||
@@ -231,39 +215,36 @@ class TestUserRepositoryUpdate:
|
|||||||
class TestUserRepositoryDeletion:
|
class TestUserRepositoryDeletion:
|
||||||
"""Tests for user deletion functionality."""
|
"""Tests for user deletion functionality."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_delete_user(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_delete_user(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test successful user deletion."""
|
"""Test successful user deletion."""
|
||||||
# Arrange
|
# Arrange
|
||||||
created_user = await in_memory_repository.create_user(sample_user_create)
|
created_user = in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
deletion_result = await in_memory_repository.delete_user(str(created_user.id))
|
deletion_result = in_memory_repository.delete_user(str(created_user.id))
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert deletion_result is True
|
assert deletion_result is True
|
||||||
|
|
||||||
# Verify user is actually deleted
|
# Verify user is actually deleted
|
||||||
found_user = await in_memory_repository.find_user_by_id(str(created_user.id))
|
found_user = in_memory_repository.find_user_by_id(str(created_user.id))
|
||||||
assert found_user is None
|
assert found_user is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_delete_user_with_invalid_id(self, in_memory_repository):
|
||||||
async def test_i_cannot_delete_user_with_invalid_id(self, in_memory_repository):
|
|
||||||
"""Test that deleting with invalid ID returns False."""
|
"""Test that deleting with invalid ID returns False."""
|
||||||
# Act
|
# Act
|
||||||
result = await in_memory_repository.delete_user("invalid_id")
|
result = in_memory_repository.delete_user("invalid_id")
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert result is False
|
assert result is False
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_cannot_delete_nonexistent_user(self, in_memory_repository):
|
||||||
async def test_i_cannot_delete_nonexistent_user(self, in_memory_repository):
|
|
||||||
"""Test that deleting nonexistent user returns False."""
|
"""Test that deleting nonexistent user returns False."""
|
||||||
# Arrange
|
# Arrange
|
||||||
nonexistent_id = str(ObjectId())
|
nonexistent_id = str(ObjectId())
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
result = await in_memory_repository.delete_user(nonexistent_id)
|
result = in_memory_repository.delete_user(nonexistent_id)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert result is False
|
assert result is False
|
||||||
@@ -272,30 +253,27 @@ class TestUserRepositoryDeletion:
|
|||||||
class TestUserRepositoryUtilities:
|
class TestUserRepositoryUtilities:
|
||||||
"""Tests for utility methods."""
|
"""Tests for utility methods."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_count_users(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_count_users(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test counting users."""
|
"""Test counting users."""
|
||||||
# Arrange
|
# Arrange
|
||||||
initial_count = await in_memory_repository.count_users()
|
initial_count = in_memory_repository.count_users()
|
||||||
await in_memory_repository.create_user(sample_user_create)
|
in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
final_count = await in_memory_repository.count_users()
|
final_count = in_memory_repository.count_users()
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert final_count == initial_count + 1
|
assert final_count == initial_count + 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
def test_i_can_check_user_exists(self, in_memory_repository, sample_user_create):
|
||||||
async def test_i_can_check_user_exists(self, in_memory_repository, sample_user_create):
|
|
||||||
"""Test checking if user exists."""
|
"""Test checking if user exists."""
|
||||||
# Arrange
|
# Arrange
|
||||||
await in_memory_repository.create_user(sample_user_create)
|
in_memory_repository.create_user(sample_user_create)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
exists = await in_memory_repository.user_exists(sample_user_create.username)
|
exists = in_memory_repository.user_exists(sample_user_create.username)
|
||||||
not_exists = await in_memory_repository.user_exists("nonexistent")
|
not_exists = in_memory_repository.user_exists("nonexistent")
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert exists is True
|
assert exists is True
|
||||||
assert not_exists is False
|
assert not_exists is False
|
||||||
|
|
||||||
0
tests/services/__init__.py
Normal file
0
tests/services/__init__.py
Normal file
704
tests/services/test_document_service.py
Normal file
704
tests/services/test_document_service.py
Normal file
@@ -0,0 +1,704 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for DocumentService using in-memory MongoDB.
|
||||||
|
|
||||||
|
Tests the orchestration logic with real MongoDB operations
|
||||||
|
using mongomock for better integration testing.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
|
||||||
|
from app.models.document import FileType
|
||||||
|
from app.services.document_service import DocumentService
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def cleanup_test_folder():
|
||||||
|
"""Clean up test folder."""
|
||||||
|
import shutil
|
||||||
|
shutil.rmtree("test_folder", ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def in_memory_database():
|
||||||
|
"""Create an in-memory database for testing."""
|
||||||
|
client = MongoClient()
|
||||||
|
return client.test_database
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
def document_service(in_memory_database):
|
||||||
|
"""Create DocumentService with in-memory repositories."""
|
||||||
|
service = DocumentService(in_memory_database, objects_folder="test_folder")
|
||||||
|
return service
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_file_bytes():
|
||||||
|
"""Sample file content as bytes."""
|
||||||
|
return b"This is a test PDF content"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_text_bytes():
|
||||||
|
"""Sample text file content as bytes."""
|
||||||
|
return b"This is a test text file content"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_file_hash():
|
||||||
|
"""Expected SHA256 hash for sample file bytes."""
|
||||||
|
import hashlib
|
||||||
|
return hashlib.sha256(b"This is a test PDF content").hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_file_saved(document_service, file_hash, file_bytes):
|
||||||
|
# Verify file is saved to disk
|
||||||
|
target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
|
||||||
|
assert os.path.exists(target_file_path)
|
||||||
|
|
||||||
|
with open(target_file_path, "rb") as f:
|
||||||
|
content = f.read()
|
||||||
|
assert content == file_bytes
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateDocument:
|
||||||
|
"""Tests for create_document method."""
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
@patch('app.services.document_service.datetime')
|
||||||
|
def test_i_can_create_document_with_new_content(
|
||||||
|
self,
|
||||||
|
mock_datetime,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test creating document when content doesn't exist yet."""
|
||||||
|
# Setup mocks
|
||||||
|
fixed_time = datetime(2025, 1, 1, 10, 30, 0)
|
||||||
|
mock_datetime.now.return_value = fixed_time
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify document creation
|
||||||
|
assert result is not None
|
||||||
|
assert result.filename == "test.pdf"
|
||||||
|
assert result.filepath == "/test/test.pdf"
|
||||||
|
assert result.file_type == FileType.PDF
|
||||||
|
assert result.detected_at == fixed_time
|
||||||
|
assert result.file_hash == document_service._calculate_file_hash(sample_file_bytes)
|
||||||
|
|
||||||
|
# Verify document created in database
|
||||||
|
doc_in_db = document_service.document_repository.find_document_by_id(result.id)
|
||||||
|
assert doc_in_db is not None
|
||||||
|
assert doc_in_db.id == result.id
|
||||||
|
assert doc_in_db.filename == result.filename
|
||||||
|
assert doc_in_db.filepath == result.filepath
|
||||||
|
assert doc_in_db.file_type == result.file_type
|
||||||
|
assert doc_in_db.detected_at == fixed_time
|
||||||
|
assert doc_in_db.file_hash == result.file_hash
|
||||||
|
|
||||||
|
# Verify file is saved to disk
|
||||||
|
validate_file_saved(document_service, result.file_hash, sample_file_bytes)
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
@patch('app.services.document_service.datetime')
|
||||||
|
def test_i_can_create_document_with_existing_content(
|
||||||
|
self,
|
||||||
|
mock_datetime,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test creating document when content already exists (deduplication)."""
|
||||||
|
# Setup mocks
|
||||||
|
fixed_time = datetime(2025, 1, 1, 10, 30, 0)
|
||||||
|
mock_datetime.now.return_value = fixed_time
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create first document
|
||||||
|
first_doc = document_service.create_document(
|
||||||
|
"/test/first.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create second document with same content
|
||||||
|
second_doc = document_service.create_document(
|
||||||
|
"/test/second.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify both documents exist but share same hash
|
||||||
|
assert first_doc.file_hash == second_doc.file_hash
|
||||||
|
assert first_doc.filename != second_doc.filename
|
||||||
|
assert first_doc.filepath != second_doc.filepath
|
||||||
|
|
||||||
|
def test_i_cannot_create_document_with_unsupported_file_type(
|
||||||
|
self,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test that unsupported file types raise ValueError."""
|
||||||
|
with pytest.raises(ValueError, match="Unsupported file type"):
|
||||||
|
document_service.create_document(
|
||||||
|
"/test/test.xyz", # Unsupported extension
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_i_cannot_create_document_with_empty_file_path(
|
||||||
|
self,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test that empty file path raises ValueError."""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
document_service.create_document(
|
||||||
|
"", # Empty path
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_create_document_with_empty_bytes(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service
|
||||||
|
):
|
||||||
|
"""Test behavior with empty file bytes."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "text/plain"
|
||||||
|
|
||||||
|
# Execute with empty bytes
|
||||||
|
result = document_service.create_document(
|
||||||
|
"/test/empty.txt",
|
||||||
|
b"", # Empty bytes
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify file is saved to disk
|
||||||
|
validate_file_saved(document_service, result.file_hash, b"")
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetMethods:
|
||||||
|
"""Tests for document retrieval methods."""
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_get_document_by_id(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test retrieving document by ID."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.get_document_by_id(created_doc.id)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_doc.id
|
||||||
|
assert result.filename == created_doc.filename
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_get_document_by_hash(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test retrieving document by file hash."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.get_document_by_hash(created_doc.file_hash)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.file_hash == created_doc.file_hash
|
||||||
|
assert result.filename == created_doc.filename
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_get_document_by_filepath(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test retrieving document by file path."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
test_path = "/test/unique_test.pdf"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
test_path,
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.get_document_by_filepath(test_path)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.filepath == test_path
|
||||||
|
assert result.id == created_doc.id
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_get_document_content(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test retrieving document with associated content."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.get_document_content_by_hash(created_doc.file_hash)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result == sample_file_bytes
|
||||||
|
|
||||||
|
def test_i_cannot_get_nonexistent_document_by_id(
|
||||||
|
self,
|
||||||
|
document_service
|
||||||
|
):
|
||||||
|
"""Test that nonexistent document returns None."""
|
||||||
|
# Execute with random ObjectId
|
||||||
|
result = document_service.get_document_by_id(ObjectId())
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_get_nonexistent_document_by_hash(
|
||||||
|
self,
|
||||||
|
document_service
|
||||||
|
):
|
||||||
|
"""Test that nonexistent document hash returns None."""
|
||||||
|
# Execute
|
||||||
|
result = document_service.get_document_by_hash("nonexistent_hash")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestPaginationAndCounting:
|
||||||
|
"""Tests for document listing and counting."""
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_list_documents_with_pagination(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test document listing with pagination parameters."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create multiple documents
|
||||||
|
for i in range(5):
|
||||||
|
document_service.create_document(
|
||||||
|
f"/test/test{i}.pdf",
|
||||||
|
sample_file_bytes + bytes(str(i), 'utf-8'), # Make each file unique
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute with pagination
|
||||||
|
result = document_service.list_documents(skip=1, limit=2)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
# Test counting
|
||||||
|
total_count = document_service.count_documents()
|
||||||
|
assert total_count == 5
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_count_documents(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test document counting."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "text/plain"
|
||||||
|
|
||||||
|
# Initially should be 0
|
||||||
|
initial_count = document_service.count_documents()
|
||||||
|
assert initial_count == 0
|
||||||
|
|
||||||
|
# Create some documents
|
||||||
|
for i in range(3):
|
||||||
|
document_service.create_document(
|
||||||
|
f"/test/test{i}.txt",
|
||||||
|
sample_file_bytes + bytes(str(i), 'utf-8'),
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
final_count = document_service.count_documents()
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert final_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateAndDelete:
|
||||||
|
"""Tests for document update and deletion operations."""
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_update_document_metadata(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test updating document metadata."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
update_data = {"metadata": {"page_count": 5}}
|
||||||
|
result = document_service.update_document(created_doc.id, update_data)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.get("page_count") == 5
|
||||||
|
assert result.filename == created_doc.filename
|
||||||
|
assert result.filepath == created_doc.filepath
|
||||||
|
assert result.file_hash == created_doc.file_hash
|
||||||
|
assert result.file_type == created_doc.file_type
|
||||||
|
assert result.metadata == update_data['metadata']
|
||||||
|
|
||||||
|
def test_i_can_update_document_content(
|
||||||
|
self,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
update_data = {"file_bytes": b"this is an updated file content"}
|
||||||
|
result = document_service.update_document(created_doc.id, update_data)
|
||||||
|
|
||||||
|
assert result.filename == created_doc.filename
|
||||||
|
assert result.filepath == created_doc.filepath
|
||||||
|
assert result.file_hash != created_doc.file_hash
|
||||||
|
assert result.file_type == created_doc.file_type
|
||||||
|
assert result.metadata == created_doc.metadata
|
||||||
|
|
||||||
|
# Verify file is saved to disk
|
||||||
|
validate_file_saved(document_service, result.file_hash, b"this is an updated file content")
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_delete_document_and_orphaned_content(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test deleting document with orphaned content cleanup."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create a document
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify content exists
|
||||||
|
validate_file_saved(document_service, created_doc.file_hash, sample_file_bytes)
|
||||||
|
|
||||||
|
# Execute deletion
|
||||||
|
result = document_service.delete_document(created_doc.id)
|
||||||
|
|
||||||
|
# Verify document and content are deleted
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
deleted_doc = document_service.get_document_by_id(created_doc.id)
|
||||||
|
assert deleted_doc is None
|
||||||
|
|
||||||
|
# validate content is deleted
|
||||||
|
file_hash = created_doc.file_hash[:24]
|
||||||
|
target_file_path = os.path.join(document_service.objects_folder, file_hash[:24], file_hash)
|
||||||
|
assert not os.path.exists(target_file_path)
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_delete_document_without_affecting_shared_content(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test deleting document without removing shared content."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "application/pdf"
|
||||||
|
|
||||||
|
# Create two documents with same content
|
||||||
|
doc1 = document_service.create_document(
|
||||||
|
"/test/test1.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
doc2 = document_service.create_document(
|
||||||
|
"/test/test2.pdf",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# They should share the same hash
|
||||||
|
assert doc1.file_hash == doc2.file_hash
|
||||||
|
|
||||||
|
# Delete first document
|
||||||
|
result = document_service.delete_document(doc1.id)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify first document is deleted but content still exists
|
||||||
|
deleted_doc = document_service.get_document_by_id(doc1.id)
|
||||||
|
assert deleted_doc is None
|
||||||
|
|
||||||
|
remaining_doc = document_service.get_document_by_id(doc2.id)
|
||||||
|
assert remaining_doc is not None
|
||||||
|
|
||||||
|
validate_file_saved(document_service, doc2.file_hash, sample_file_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHashCalculation:
|
||||||
|
"""Tests for file hash calculation utility."""
|
||||||
|
|
||||||
|
def test_i_can_calculate_consistent_file_hash(self, document_service):
|
||||||
|
"""Test that file hash calculation is consistent."""
|
||||||
|
test_bytes = b"Test content for hashing"
|
||||||
|
|
||||||
|
# Calculate hash multiple times
|
||||||
|
hash1 = document_service._calculate_file_hash(test_bytes)
|
||||||
|
hash2 = document_service._calculate_file_hash(test_bytes)
|
||||||
|
|
||||||
|
# Should be identical
|
||||||
|
assert hash1 == hash2
|
||||||
|
assert len(hash1) == 64 # SHA256 produces 64-character hex string
|
||||||
|
|
||||||
|
def test_i_get_different_hashes_for_different_content(self, document_service):
|
||||||
|
"""Test that different content produces different hashes."""
|
||||||
|
content1 = b"First content"
|
||||||
|
content2 = b"Second content"
|
||||||
|
|
||||||
|
hash1 = document_service._calculate_file_hash(content1)
|
||||||
|
hash2 = document_service._calculate_file_hash(content2)
|
||||||
|
|
||||||
|
assert hash1 != hash2
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileTypeDetection:
|
||||||
|
"""Tests for file type detection."""
|
||||||
|
|
||||||
|
def test_i_can_detect_pdf_file_type(self, document_service):
|
||||||
|
"""Test PDF file type detection."""
|
||||||
|
file_type = document_service._detect_file_type("/path/to/document.pdf")
|
||||||
|
assert file_type == FileType.PDF
|
||||||
|
|
||||||
|
def test_i_can_detect_txt_file_type(self, document_service):
|
||||||
|
"""Test text file type detection."""
|
||||||
|
file_type = document_service._detect_file_type("/path/to/document.txt")
|
||||||
|
assert file_type == FileType.TXT
|
||||||
|
|
||||||
|
def test_i_can_detect_docx_file_type(self, document_service):
|
||||||
|
"""Test DOCX file type detection."""
|
||||||
|
file_type = document_service._detect_file_type("/path/to/document.docx")
|
||||||
|
assert file_type == FileType.DOCX
|
||||||
|
|
||||||
|
def test_i_cannot_detect_unsupported_file_type(self, document_service):
|
||||||
|
"""Test unsupported file type raises ValueError."""
|
||||||
|
with pytest.raises(ValueError, match="Unsupported file type"):
|
||||||
|
document_service._detect_file_type("/path/to/document.xyz")
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreatePdf:
|
||||||
|
"""Tests for create_pdf method."""
|
||||||
|
|
||||||
|
@patch('app.services.document_service.convert_to_pdf')
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_create_pdf_successfully(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
mock_convert_to_pdf,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test creating PDF from an existing document."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "text/plain"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.txt",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock the PDF conversion
|
||||||
|
pdf_path = os.path.join(document_service.temp_folder, "converted.pdf")
|
||||||
|
mock_convert_to_pdf.return_value = pdf_path
|
||||||
|
|
||||||
|
# Write a sample PDF file that the conversion would create
|
||||||
|
pdf_content = b"This is PDF content"
|
||||||
|
os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
|
||||||
|
with open(pdf_path, "wb") as f:
|
||||||
|
f.write(pdf_content)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.create_pdf(created_doc.id)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Get the updated document
|
||||||
|
updated_doc = document_service.get_document_by_id(created_doc.id)
|
||||||
|
assert updated_doc.pdf_file_hash is not None
|
||||||
|
|
||||||
|
# Verify the PDF content was saved
|
||||||
|
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||||
|
assert updated_doc.pdf_file_hash == pdf_hash
|
||||||
|
|
||||||
|
# Verify convert_to_pdf was called with correct arguments
|
||||||
|
doc_path = document_service.get_document_path(created_doc.file_hash)
|
||||||
|
mock_convert_to_pdf.assert_called_once_with(doc_path, document_service.temp_folder)
|
||||||
|
|
||||||
|
# Verify content exists on disk
|
||||||
|
validate_file_saved(document_service, pdf_hash, pdf_content)
|
||||||
|
|
||||||
|
# Verify PDF hash was added to document
|
||||||
|
updated_doc = document_service.get_document_by_id(created_doc.id)
|
||||||
|
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||||
|
assert updated_doc.pdf_file_hash == pdf_hash
|
||||||
|
|
||||||
|
@patch('app.services.document_service.convert_to_pdf')
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_can_reuse_existing_pdf(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
mock_convert_to_pdf,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test that if PDF already exists, it doesn't recreate it."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "text/plain"
|
||||||
|
|
||||||
|
# Create a document first
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.txt",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a fake PDF file and update the document
|
||||||
|
pdf_content = b"This is PDF content"
|
||||||
|
pdf_hash = document_service._calculate_file_hash(pdf_content)
|
||||||
|
document_service.save_content_if_needed(pdf_hash, pdf_content)
|
||||||
|
document_service.update_document(created_doc.id, {"pdf_file_hash": pdf_hash})
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.create_pdf(created_doc.id)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify convert_to_pdf was NOT called
|
||||||
|
mock_convert_to_pdf.assert_not_called()
|
||||||
|
|
||||||
|
def test_i_cannot_create_pdf_for_nonexistent_document(
|
||||||
|
self,
|
||||||
|
document_service
|
||||||
|
):
|
||||||
|
"""Test behavior when document ID doesn't exist."""
|
||||||
|
# Execute with random ObjectId
|
||||||
|
result = document_service.create_pdf(ObjectId())
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch('app.services.document_service.magic.from_buffer')
|
||||||
|
def test_i_cannot_create_pdf_when_file_content_missing(
|
||||||
|
self,
|
||||||
|
mock_magic,
|
||||||
|
document_service,
|
||||||
|
sample_file_bytes
|
||||||
|
):
|
||||||
|
"""Test behavior when file content doesn't exist."""
|
||||||
|
# Setup
|
||||||
|
mock_magic.return_value = "text/plain"
|
||||||
|
|
||||||
|
# Create a document
|
||||||
|
created_doc = document_service.create_document(
|
||||||
|
"/test/test.txt",
|
||||||
|
sample_file_bytes,
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simulate missing content by removing file
|
||||||
|
file_path = document_service.get_document_path(created_doc.file_hash)
|
||||||
|
os.remove(file_path)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = document_service.create_pdf(created_doc.id)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is False
|
||||||
537
tests/services/test_job_service.py
Normal file
537
tests/services/test_job_service.py
Normal file
@@ -0,0 +1,537 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for JobService using in-memory MongoDB.
|
||||||
|
|
||||||
|
Tests the business logic operations with real MongoDB operations
|
||||||
|
using mongomock for better integration testing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
|
||||||
|
from app.exceptions.job_exceptions import InvalidStatusTransitionError
|
||||||
|
from app.models.job import ProcessingStatus
|
||||||
|
from app.models.types import PyObjectId
|
||||||
|
from app.services.job_service import JobService
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def in_memory_database():
|
||||||
|
"""Create an in-memory database for testing."""
|
||||||
|
client = MongoClient()
|
||||||
|
return client.test_database
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def job_service(in_memory_database):
|
||||||
|
"""Create JobService with in-memory repositories."""
|
||||||
|
service = JobService(in_memory_database).initialize()
|
||||||
|
return service
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_document_id():
|
||||||
|
"""Sample file ObjectId."""
|
||||||
|
return PyObjectId()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_task_id():
|
||||||
|
"""Sample Celery task UUID."""
|
||||||
|
return "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateJob:
|
||||||
|
"""Tests for create_job method."""
|
||||||
|
|
||||||
|
def test_i_can_create_job_with_task_id(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test creating job with task ID."""
|
||||||
|
# Execute
|
||||||
|
result = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Verify job creation
|
||||||
|
assert result is not None
|
||||||
|
assert result.document_id == sample_document_id
|
||||||
|
assert result.task_id == sample_task_id
|
||||||
|
assert result.status == ProcessingStatus.PENDING
|
||||||
|
assert result.created_at is not None
|
||||||
|
assert result.started_at is None
|
||||||
|
assert result.error_message is None
|
||||||
|
|
||||||
|
# Verify job exists in database
|
||||||
|
job_in_db = job_service.get_job_by_id(result.id)
|
||||||
|
assert job_in_db is not None
|
||||||
|
assert job_in_db.id == result.id
|
||||||
|
assert job_in_db.document_id == sample_document_id
|
||||||
|
assert job_in_db.task_id == sample_task_id
|
||||||
|
assert job_in_db.status == ProcessingStatus.PENDING
|
||||||
|
|
||||||
|
def test_i_can_create_job_without_task_id(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id
|
||||||
|
):
|
||||||
|
"""Test creating job without task ID."""
|
||||||
|
# Execute
|
||||||
|
result = job_service.create_job(sample_document_id)
|
||||||
|
|
||||||
|
# Verify job creation
|
||||||
|
assert result is not None
|
||||||
|
assert result.document_id == sample_document_id
|
||||||
|
assert result.task_id is None
|
||||||
|
assert result.status == ProcessingStatus.PENDING
|
||||||
|
assert result.created_at is not None
|
||||||
|
assert result.started_at is None
|
||||||
|
assert result.error_message is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetJobMethods:
|
||||||
|
"""Tests for job retrieval methods."""
|
||||||
|
|
||||||
|
def test_i_can_get_job_by_id(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test retrieving job by ID."""
|
||||||
|
# Create a job first
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = job_service.get_job_by_id(created_job.id)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_job.id
|
||||||
|
assert result.document_id == created_job.document_id
|
||||||
|
assert result.task_id == created_job.task_id
|
||||||
|
assert result.status == created_job.status
|
||||||
|
|
||||||
|
def test_i_can_get_jobs_by_status(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id
|
||||||
|
):
|
||||||
|
"""Test retrieving jobs by status."""
|
||||||
|
# Create jobs with different statuses
|
||||||
|
pending_job = job_service.create_job(sample_document_id, "pending-task")
|
||||||
|
|
||||||
|
processing_job = job_service.create_job(ObjectId(), "processing-task")
|
||||||
|
job_service.mark_job_as_started(processing_job.id)
|
||||||
|
|
||||||
|
completed_job = job_service.create_job(ObjectId(), "completed-task")
|
||||||
|
job_service.mark_job_as_started(completed_job.id)
|
||||||
|
job_service.mark_job_as_completed(completed_job.id)
|
||||||
|
|
||||||
|
# Execute - get pending jobs
|
||||||
|
pending_results = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert len(pending_results) == 1
|
||||||
|
assert pending_results[0].id == pending_job.id
|
||||||
|
assert pending_results[0].status == ProcessingStatus.PENDING
|
||||||
|
|
||||||
|
# Execute - get processing jobs
|
||||||
|
processing_results = job_service.get_jobs_by_status(ProcessingStatus.PROCESSING)
|
||||||
|
assert len(processing_results) == 1
|
||||||
|
assert processing_results[0].status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
# Execute - get completed jobs
|
||||||
|
completed_results = job_service.get_jobs_by_status(ProcessingStatus.COMPLETED)
|
||||||
|
assert len(completed_results) == 1
|
||||||
|
assert completed_results[0].status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateStatus:
|
||||||
|
"""Tests for mark_job_as_started method."""
|
||||||
|
|
||||||
|
def test_i_can_mark_pending_job_as_started(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test marking pending job as started (PENDING → PROCESSING)."""
|
||||||
|
# Create a pending job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
assert created_job.status == ProcessingStatus.PENDING
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Verify status transition
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_job.id
|
||||||
|
assert result.status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_job = job_service.get_job_by_id(created_job.id)
|
||||||
|
assert updated_job.status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
def test_i_cannot_mark_processing_job_as_started(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that processing job cannot be marked as started."""
|
||||||
|
# Create and start a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Try to start it again
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.PROCESSING
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
def test_i_cannot_mark_completed_job_as_started(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that completed job cannot be marked as started."""
|
||||||
|
# Create, start, and complete a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Try to start it again
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
def test_i_cannot_mark_failed_job_as_started(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that failed job cannot be marked as started."""
|
||||||
|
# Create, start, and fail a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "Test error")
|
||||||
|
|
||||||
|
# Try to start it again
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.FAILED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
def test_i_can_mark_processing_job_as_completed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test marking processing job as completed (PROCESSING → COMPLETED)."""
|
||||||
|
# Create and start a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
started_job = job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Verify status transition
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_job.id
|
||||||
|
assert result.status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_job = job_service.get_job_by_id(created_job.id)
|
||||||
|
assert updated_job.status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
def test_i_cannot_mark_pending_job_as_completed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that pending job cannot be marked as completed."""
|
||||||
|
# Create a pending job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Try to complete it directly
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.PENDING
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
def test_i_cannot_mark_completed_job_as_completed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that completed job cannot be marked as completed again."""
|
||||||
|
# Create, start, and complete a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Try to complete it again
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
def test_i_cannot_mark_failed_job_as_completed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that failed job cannot be marked as completed."""
|
||||||
|
# Create, start, and fail a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "Test error")
|
||||||
|
|
||||||
|
# Try to complete it
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.FAILED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
def test_i_can_mark_processing_job_as_failed_with_error_message(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test marking processing job as failed with error message."""
|
||||||
|
# Create and start a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
started_job = job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
error_message = "Processing failed due to invalid file format"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = job_service.mark_job_as_failed(created_job.id, error_message)
|
||||||
|
|
||||||
|
# Verify status transition
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_job.id
|
||||||
|
assert result.status == ProcessingStatus.FAILED
|
||||||
|
assert result.error_message == error_message
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_job = job_service.get_job_by_id(created_job.id)
|
||||||
|
assert updated_job.status == ProcessingStatus.FAILED
|
||||||
|
assert updated_job.error_message == error_message
|
||||||
|
|
||||||
|
def test_i_can_mark_processing_job_as_failed_without_error_message(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test marking processing job as failed without error message."""
|
||||||
|
# Create and start a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Execute without error message
|
||||||
|
result = job_service.mark_job_as_failed(created_job.id)
|
||||||
|
|
||||||
|
# Verify status transition
|
||||||
|
assert result is not None
|
||||||
|
assert result.status == ProcessingStatus.FAILED
|
||||||
|
assert result.error_message is None
|
||||||
|
|
||||||
|
def test_i_cannot_mark_pending_job_as_failed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that pending job cannot be marked as failed."""
|
||||||
|
# Create a pending job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Try to fail it directly
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "Test error")
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.PENDING
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.FAILED
|
||||||
|
|
||||||
|
def test_i_cannot_mark_completed_job_as_failed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that completed job cannot be marked as failed."""
|
||||||
|
# Create, start, and complete a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_completed(created_job.id)
|
||||||
|
|
||||||
|
# Try to fail it
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "Test error")
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.COMPLETED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.FAILED
|
||||||
|
|
||||||
|
def test_i_cannot_mark_failed_job_as_failed(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that failed job cannot be marked as failed again."""
|
||||||
|
# Create, start, and fail a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "First error")
|
||||||
|
|
||||||
|
# Try to fail it again
|
||||||
|
with pytest.raises(InvalidStatusTransitionError) as exc_info:
|
||||||
|
job_service.mark_job_as_failed(created_job.id, "Second error")
|
||||||
|
|
||||||
|
# Verify exception details
|
||||||
|
assert exc_info.value.current_status == ProcessingStatus.FAILED
|
||||||
|
assert exc_info.value.target_status == ProcessingStatus.FAILED
|
||||||
|
|
||||||
|
def test_i_can_update_job_status(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test that failed job cannot be marked as failed again."""
|
||||||
|
# Create, start, and fail a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
job_service.mark_job_as_started(created_job.id)
|
||||||
|
|
||||||
|
# Execute without error message
|
||||||
|
result = job_service.update_job_status(created_job.id, ProcessingStatus.SAVING_OBJECT)
|
||||||
|
|
||||||
|
# Verify status transition
|
||||||
|
assert result is not None
|
||||||
|
assert result.status == ProcessingStatus.SAVING_OBJECT
|
||||||
|
assert result.error_message is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeleteJob:
|
||||||
|
"""Tests for delete_job method."""
|
||||||
|
|
||||||
|
def test_i_can_delete_existing_job(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test deleting an existing job."""
|
||||||
|
# Create a job
|
||||||
|
created_job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
|
||||||
|
# Verify job exists
|
||||||
|
job_before_delete = job_service.get_job_by_id(created_job.id)
|
||||||
|
assert job_before_delete is not None
|
||||||
|
|
||||||
|
# Execute deletion
|
||||||
|
result = job_service.delete_job(created_job.id)
|
||||||
|
|
||||||
|
# Verify deletion
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify job no longer exists
|
||||||
|
deleted_job = job_service.get_job_by_id(created_job.id)
|
||||||
|
assert deleted_job is None
|
||||||
|
|
||||||
|
def test_i_cannot_delete_nonexistent_job(
|
||||||
|
self,
|
||||||
|
job_service
|
||||||
|
):
|
||||||
|
"""Test deleting a nonexistent job returns False."""
|
||||||
|
# Execute deletion with random ObjectId
|
||||||
|
result = job_service.delete_job(ObjectId())
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestStatusTransitionValidation:
|
||||||
|
"""Tests for status transition validation across different scenarios."""
|
||||||
|
|
||||||
|
def test_valid_job_lifecycle_flow(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test complete valid job lifecycle: PENDING → PROCESSING → COMPLETED."""
|
||||||
|
# Create job (PENDING)
|
||||||
|
job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
assert job.status == ProcessingStatus.PENDING
|
||||||
|
|
||||||
|
# Start job (PENDING → PROCESSING)
|
||||||
|
started_job = job_service.mark_job_as_started(job.id)
|
||||||
|
assert started_job.status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
# Complete job (PROCESSING → COMPLETED)
|
||||||
|
completed_job = job_service.mark_job_as_completed(job.id)
|
||||||
|
assert completed_job.status == ProcessingStatus.COMPLETED
|
||||||
|
|
||||||
|
def test_valid_job_failure_flow(
|
||||||
|
self,
|
||||||
|
job_service,
|
||||||
|
sample_document_id,
|
||||||
|
sample_task_id
|
||||||
|
):
|
||||||
|
"""Test valid job failure: PENDING → PROCESSING → FAILED."""
|
||||||
|
# Create job (PENDING)
|
||||||
|
job = job_service.create_job(sample_document_id, sample_task_id)
|
||||||
|
assert job.status == ProcessingStatus.PENDING
|
||||||
|
|
||||||
|
# Start job (PENDING → PROCESSING)
|
||||||
|
started_job = job_service.mark_job_as_started(job.id)
|
||||||
|
assert started_job.status == ProcessingStatus.PROCESSING
|
||||||
|
|
||||||
|
# Fail job (PROCESSING → FAILED)
|
||||||
|
failed_job = job_service.mark_job_as_failed(job.id, "Test failure")
|
||||||
|
assert failed_job.status == ProcessingStatus.FAILED
|
||||||
|
assert failed_job.error_message == "Test failure"
|
||||||
|
|
||||||
|
def test_job_operations_with_empty_database(
|
||||||
|
self,
|
||||||
|
job_service
|
||||||
|
):
|
||||||
|
"""Test job operations when database is empty."""
|
||||||
|
# Try to get nonexistent job
|
||||||
|
result = job_service.get_job_by_id(ObjectId())
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Try to get jobs by status when none exist
|
||||||
|
pending_jobs = job_service.get_jobs_by_status(ProcessingStatus.PENDING)
|
||||||
|
assert pending_jobs == []
|
||||||
|
|
||||||
|
# Try to delete nonexistent job
|
||||||
|
delete_result = job_service.delete_job(ObjectId())
|
||||||
|
assert delete_result is False
|
||||||
739
tests/services/test_user_service.py
Normal file
739
tests/services/test_user_service.py
Normal file
@@ -0,0 +1,739 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for UserService using in-memory MongoDB.
|
||||||
|
|
||||||
|
Tests the business logic operations with real MongoDB operations
|
||||||
|
using mongomock for better integration testing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from bson import ObjectId
|
||||||
|
from mongomock.mongo_client import MongoClient
|
||||||
|
|
||||||
|
from app.models.auth import UserRole
|
||||||
|
from app.models.user import UserCreate, UserUpdate, UserCreateNoValidation
|
||||||
|
from app.services.user_service import UserService
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def in_memory_database():
|
||||||
|
"""Create an in-memory database for testing."""
|
||||||
|
client = MongoClient()
|
||||||
|
return client.test_database
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def user_service(in_memory_database):
|
||||||
|
"""Create UserService with in-memory repositories."""
|
||||||
|
service = UserService(in_memory_database).initialize()
|
||||||
|
return service
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_user_data():
|
||||||
|
"""Sample user data for testing."""
|
||||||
|
return {
|
||||||
|
"username": "testuser",
|
||||||
|
"email": "testuser@example.com",
|
||||||
|
"password": "SecureP@ssw0rd123"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_user_data_2():
|
||||||
|
"""Second sample user data for testing."""
|
||||||
|
return {
|
||||||
|
"username": "anotheruser",
|
||||||
|
"email": "anotheruser@example.com",
|
||||||
|
"password": "AnotherP@ssw0rd456"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateUser:
|
||||||
|
"""Tests for create_user method."""
|
||||||
|
|
||||||
|
def test_i_can_create_user_with_valid_data(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test creating user with valid data."""
|
||||||
|
# Execute
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
result = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Verify user creation
|
||||||
|
assert result is not None
|
||||||
|
assert result.username == sample_user_data["username"]
|
||||||
|
assert result.email == sample_user_data["email"]
|
||||||
|
assert result.hashed_password is not None
|
||||||
|
assert result.hashed_password != sample_user_data["password"]
|
||||||
|
assert result.role == UserRole.USER
|
||||||
|
assert result.is_active is True
|
||||||
|
assert result.preferences == {}
|
||||||
|
assert result.created_at is not None
|
||||||
|
assert result.updated_at is not None
|
||||||
|
|
||||||
|
# Verify user exists in database
|
||||||
|
user_in_db = user_service.get_user_by_id(str(result.id))
|
||||||
|
assert user_in_db is not None
|
||||||
|
assert user_in_db.id == result.id
|
||||||
|
assert user_in_db.username == sample_user_data["username"]
|
||||||
|
|
||||||
|
def test_i_cannot_create_user_with_duplicate_username(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test that duplicate username raises ValueError."""
|
||||||
|
# Create first user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Try to create user with same username but different email
|
||||||
|
duplicate_user_data = sample_user_data.copy()
|
||||||
|
duplicate_user_data["email"] = "different@example.com"
|
||||||
|
duplicate_user_create = UserCreate(**duplicate_user_data)
|
||||||
|
|
||||||
|
# Execute and verify exception
|
||||||
|
with pytest.raises(ValueError) as exc_info:
|
||||||
|
user_service.create_user(duplicate_user_create)
|
||||||
|
|
||||||
|
assert "already exists" in str(exc_info.value)
|
||||||
|
assert sample_user_data["username"] in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_i_cannot_create_user_with_duplicate_email(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test that duplicate email raises ValueError."""
|
||||||
|
# Create first user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Try to create user with same email but different username
|
||||||
|
duplicate_user_data = sample_user_data.copy()
|
||||||
|
duplicate_user_data["username"] = "differentuser"
|
||||||
|
duplicate_user_create = UserCreate(**duplicate_user_data)
|
||||||
|
|
||||||
|
# Execute and verify exception
|
||||||
|
with pytest.raises(ValueError) as exc_info:
|
||||||
|
user_service.create_user(duplicate_user_create)
|
||||||
|
|
||||||
|
assert "already exists" in str(exc_info.value)
|
||||||
|
assert sample_user_data["email"] in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetUserMethods:
|
||||||
|
"""Tests for user retrieval methods."""
|
||||||
|
|
||||||
|
def test_i_can_get_user_by_username(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test retrieving user by username."""
|
||||||
|
# Create a user first
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.get_user_by_username(sample_user_data["username"])
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_user.id
|
||||||
|
assert result.username == sample_user_data["username"]
|
||||||
|
assert result.email == sample_user_data["email"]
|
||||||
|
|
||||||
|
def test_i_can_get_user_by_id(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test retrieving user by ID."""
|
||||||
|
# Create a user first
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_user.id
|
||||||
|
assert result.username == sample_user_data["username"]
|
||||||
|
assert result.email == sample_user_data["email"]
|
||||||
|
|
||||||
|
def test_i_can_check_user_exists(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test checking if user exists."""
|
||||||
|
# Initially should not exist
|
||||||
|
assert user_service.user_exists(sample_user_data["username"]) is False
|
||||||
|
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Now should exist
|
||||||
|
assert user_service.user_exists(sample_user_data["username"]) is True
|
||||||
|
|
||||||
|
def test_i_cannot_get_nonexistent_user_by_username(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test retrieving nonexistent user by username returns None."""
|
||||||
|
# Execute
|
||||||
|
result = user_service.get_user_by_username("nonexistentuser")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_get_nonexistent_user_by_id(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test retrieving nonexistent user by ID returns None."""
|
||||||
|
# Execute with random ObjectId
|
||||||
|
result = user_service.get_user_by_id(str(ObjectId()))
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuthenticateUser:
|
||||||
|
"""Tests for authenticate_user method."""
|
||||||
|
|
||||||
|
def test_i_can_authenticate_user_with_valid_credentials(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test authenticating user with valid credentials."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute authentication
|
||||||
|
result = user_service.authenticate_user(
|
||||||
|
sample_user_data["username"],
|
||||||
|
sample_user_data["password"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == created_user.id
|
||||||
|
assert result.username == sample_user_data["username"]
|
||||||
|
|
||||||
|
def test_i_cannot_authenticate_user_with_wrong_password(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test authenticating user with wrong password returns None."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute authentication with wrong password
|
||||||
|
result = user_service.authenticate_user(
|
||||||
|
sample_user_data["username"],
|
||||||
|
"WrongP@ssw0rd123"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_authenticate_user_with_wrong_username(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test authenticating user with wrong username returns None."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute authentication with wrong username
|
||||||
|
result = user_service.authenticate_user(
|
||||||
|
"wrongusername",
|
||||||
|
sample_user_data["password"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_authenticate_inactive_user(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test authenticating inactive user returns None."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Deactivate the user
|
||||||
|
user_service.update_user(str(created_user.id), UserUpdate(is_active=False))
|
||||||
|
|
||||||
|
# Execute authentication
|
||||||
|
result = user_service.authenticate_user(
|
||||||
|
sample_user_data["username"],
|
||||||
|
sample_user_data["password"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateUser:
|
||||||
|
"""Tests for update_user method."""
|
||||||
|
|
||||||
|
def test_i_can_update_user_username(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test updating user username."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
new_username = "updatedusername"
|
||||||
|
result = user_service.update_user(
|
||||||
|
str(created_user.id),
|
||||||
|
UserUpdate(username=new_username)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.username == new_username
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert updated_user.username == new_username
|
||||||
|
|
||||||
|
def test_i_can_update_user_email(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test updating user email."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
new_email = "newemail@example.com"
|
||||||
|
result = user_service.update_user(
|
||||||
|
str(created_user.id),
|
||||||
|
UserUpdate(email=new_email)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.email == new_email
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert updated_user.email == new_email
|
||||||
|
|
||||||
|
def test_i_can_update_user_role(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test updating user role."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
result = user_service.update_user(
|
||||||
|
str(created_user.id),
|
||||||
|
UserUpdate(role=UserRole.ADMIN)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.role == UserRole.ADMIN
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert updated_user.role == UserRole.ADMIN
|
||||||
|
|
||||||
|
def test_i_can_update_user_is_active(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test updating user is_active status."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute update
|
||||||
|
result = user_service.update_user(
|
||||||
|
str(created_user.id),
|
||||||
|
UserUpdate(is_active=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.is_active is False
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert updated_user.is_active is False
|
||||||
|
|
||||||
|
def test_i_cannot_update_user_with_duplicate_username(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data,
|
||||||
|
sample_user_data_2
|
||||||
|
):
|
||||||
|
"""Test that updating to existing username raises ValueError."""
|
||||||
|
# Create two users
|
||||||
|
user_create_1 = UserCreate(**sample_user_data)
|
||||||
|
user_1 = user_service.create_user(user_create_1)
|
||||||
|
|
||||||
|
user_create_2 = UserCreate(**sample_user_data_2)
|
||||||
|
user_2 = user_service.create_user(user_create_2)
|
||||||
|
|
||||||
|
# Try to update user_2 with user_1's username
|
||||||
|
with pytest.raises(ValueError) as exc_info:
|
||||||
|
user_service.update_user(
|
||||||
|
str(user_2.id),
|
||||||
|
UserUpdate(username=sample_user_data["username"])
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "already taken" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_i_cannot_update_user_with_duplicate_email(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data,
|
||||||
|
sample_user_data_2
|
||||||
|
):
|
||||||
|
"""Test that updating to existing email raises ValueError."""
|
||||||
|
# Create two users
|
||||||
|
user_create_1 = UserCreate(**sample_user_data)
|
||||||
|
user_1 = user_service.create_user(user_create_1)
|
||||||
|
|
||||||
|
user_create_2 = UserCreate(**sample_user_data_2)
|
||||||
|
user_2 = user_service.create_user(user_create_2)
|
||||||
|
|
||||||
|
# Try to update user_2 with user_1's email
|
||||||
|
with pytest.raises(ValueError) as exc_info:
|
||||||
|
user_service.update_user(
|
||||||
|
str(user_2.id),
|
||||||
|
UserUpdate(email=sample_user_data["email"])
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "already taken" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_i_cannot_update_nonexistent_user(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test updating nonexistent user returns None."""
|
||||||
|
# Execute update with random ObjectId
|
||||||
|
result = user_service.update_user(
|
||||||
|
str(ObjectId()),
|
||||||
|
UserUpdate(username="newusername")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeleteUser:
|
||||||
|
"""Tests for delete_user method."""
|
||||||
|
|
||||||
|
def test_i_can_delete_existing_user(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test deleting an existing user."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Verify user exists
|
||||||
|
user_before_delete = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert user_before_delete is not None
|
||||||
|
|
||||||
|
# Execute deletion
|
||||||
|
result = user_service.delete_user(str(created_user.id))
|
||||||
|
|
||||||
|
# Verify deletion
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify user no longer exists
|
||||||
|
deleted_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert deleted_user is None
|
||||||
|
|
||||||
|
def test_i_cannot_delete_nonexistent_user(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test deleting a nonexistent user returns False."""
|
||||||
|
# Execute deletion with random ObjectId
|
||||||
|
result = user_service.delete_user(str(ObjectId()))
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestListAndCountMethods:
|
||||||
|
"""Tests for list_users and count_users methods."""
|
||||||
|
|
||||||
|
def test_i_can_list_users(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data,
|
||||||
|
sample_user_data_2
|
||||||
|
):
|
||||||
|
"""Test listing all users."""
|
||||||
|
# Create multiple users
|
||||||
|
user_create_1 = UserCreate(**sample_user_data)
|
||||||
|
user_1 = user_service.create_user(user_create_1)
|
||||||
|
|
||||||
|
user_create_2 = UserCreate(**sample_user_data_2)
|
||||||
|
user_2 = user_service.create_user(user_create_2)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.list_users()
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert len(result) == 2
|
||||||
|
usernames = [user.username for user in result]
|
||||||
|
assert sample_user_data["username"] in usernames
|
||||||
|
assert sample_user_data_2["username"] in usernames
|
||||||
|
|
||||||
|
def test_i_can_list_users_with_pagination(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test listing users with pagination."""
|
||||||
|
# Create 5 users
|
||||||
|
for i in range(5):
|
||||||
|
user_data = UserCreateNoValidation(
|
||||||
|
username=f"user{i}",
|
||||||
|
email=f"user{i}@example.com",
|
||||||
|
password="SecureP@ssw0rd123"
|
||||||
|
)
|
||||||
|
user_service.create_user(user_data)
|
||||||
|
|
||||||
|
# Test skip and limit
|
||||||
|
result_page_1 = user_service.list_users(skip=0, limit=2)
|
||||||
|
assert len(result_page_1) == 2
|
||||||
|
|
||||||
|
result_page_2 = user_service.list_users(skip=2, limit=2)
|
||||||
|
assert len(result_page_2) == 2
|
||||||
|
|
||||||
|
result_page_3 = user_service.list_users(skip=4, limit=2)
|
||||||
|
assert len(result_page_3) == 1
|
||||||
|
|
||||||
|
# Verify different users in each page
|
||||||
|
page_1_usernames = [user.username for user in result_page_1]
|
||||||
|
page_2_usernames = [user.username for user in result_page_2]
|
||||||
|
assert page_1_usernames != page_2_usernames
|
||||||
|
|
||||||
|
def test_i_can_count_users(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data,
|
||||||
|
sample_user_data_2
|
||||||
|
):
|
||||||
|
"""Test counting users."""
|
||||||
|
# Initially no users
|
||||||
|
assert user_service.count_users() == 0
|
||||||
|
|
||||||
|
# Create first user
|
||||||
|
user_create_1 = UserCreate(**sample_user_data)
|
||||||
|
user_service.create_user(user_create_1)
|
||||||
|
assert user_service.count_users() == 1
|
||||||
|
|
||||||
|
# Create second user
|
||||||
|
user_create_2 = UserCreate(**sample_user_data_2)
|
||||||
|
user_service.create_user(user_create_2)
|
||||||
|
assert user_service.count_users() == 2
|
||||||
|
|
||||||
|
def test_list_users_returns_empty_list_when_no_users(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test listing users returns empty list when no users exist."""
|
||||||
|
# Execute
|
||||||
|
result = user_service.list_users()
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestUserPreferences:
|
||||||
|
"""Tests for user preferences methods."""
|
||||||
|
|
||||||
|
def test_i_can_get_user_preference(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test getting user preference."""
|
||||||
|
# Create a user with preferences
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Set a preference
|
||||||
|
user_service.set_preference(str(created_user.id), "theme", "dark")
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.get_preference(str(created_user.id), "theme")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result == "dark"
|
||||||
|
|
||||||
|
def test_i_can_set_user_preference(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test setting user preference."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.set_preference(str(created_user.id), "language", "fr")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is not None
|
||||||
|
assert result.preferences.get("language") == "fr"
|
||||||
|
|
||||||
|
# Verify in database
|
||||||
|
updated_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert updated_user.preferences.get("language") == "fr"
|
||||||
|
|
||||||
|
def test_i_cannot_get_preference_for_nonexistent_user(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test getting preference for nonexistent user returns None."""
|
||||||
|
# Execute with random ObjectId
|
||||||
|
result = user_service.get_preference(str(ObjectId()), "theme")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_i_cannot_set_preference_for_nonexistent_user(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test setting preference for nonexistent user returns None."""
|
||||||
|
# Execute with random ObjectId
|
||||||
|
result = user_service.set_preference(str(ObjectId()), "theme", "dark")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_get_preference_returns_none_for_nonexistent_key(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test getting nonexistent preference key returns None."""
|
||||||
|
# Create a user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
result = user_service.get_preference(str(created_user.id), "nonexistent_key")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestUserLifecycle:
|
||||||
|
"""Tests for complete user lifecycle scenarios."""
|
||||||
|
|
||||||
|
def test_complete_user_lifecycle(
|
||||||
|
self,
|
||||||
|
user_service,
|
||||||
|
sample_user_data
|
||||||
|
):
|
||||||
|
"""Test complete user lifecycle: create → authenticate → update → preferences → delete."""
|
||||||
|
# Create user
|
||||||
|
user_create = UserCreate(**sample_user_data)
|
||||||
|
created_user = user_service.create_user(user_create)
|
||||||
|
assert created_user is not None
|
||||||
|
assert created_user.username == sample_user_data["username"]
|
||||||
|
|
||||||
|
# Authenticate user
|
||||||
|
authenticated_user = user_service.authenticate_user(
|
||||||
|
sample_user_data["username"],
|
||||||
|
sample_user_data["password"]
|
||||||
|
)
|
||||||
|
assert authenticated_user is not None
|
||||||
|
assert authenticated_user.id == created_user.id
|
||||||
|
|
||||||
|
# Update user
|
||||||
|
updated_user = user_service.update_user(
|
||||||
|
str(created_user.id),
|
||||||
|
UserUpdate(role=UserRole.ADMIN)
|
||||||
|
)
|
||||||
|
assert updated_user.role == UserRole.ADMIN
|
||||||
|
|
||||||
|
# Set preference
|
||||||
|
user_with_pref = user_service.set_preference(
|
||||||
|
str(created_user.id),
|
||||||
|
"theme",
|
||||||
|
"dark"
|
||||||
|
)
|
||||||
|
assert user_with_pref.preferences.get("theme") == "dark"
|
||||||
|
|
||||||
|
# Get preference
|
||||||
|
pref_value = user_service.get_preference(str(created_user.id), "theme")
|
||||||
|
assert pref_value == "dark"
|
||||||
|
|
||||||
|
# Delete user
|
||||||
|
delete_result = user_service.delete_user(str(created_user.id))
|
||||||
|
assert delete_result is True
|
||||||
|
|
||||||
|
# Verify user no longer exists
|
||||||
|
deleted_user = user_service.get_user_by_id(str(created_user.id))
|
||||||
|
assert deleted_user is None
|
||||||
|
|
||||||
|
def test_user_operations_with_empty_database(
|
||||||
|
self,
|
||||||
|
user_service
|
||||||
|
):
|
||||||
|
"""Test user operations when database is empty."""
|
||||||
|
# Try to get nonexistent user
|
||||||
|
result = user_service.get_user_by_id(str(ObjectId()))
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Try to get user by username
|
||||||
|
result = user_service.get_user_by_username("nonexistent")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Try to list users
|
||||||
|
users = user_service.list_users()
|
||||||
|
assert users == []
|
||||||
|
|
||||||
|
# Try to count users
|
||||||
|
count = user_service.count_users()
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
# Try to delete nonexistent user
|
||||||
|
delete_result = user_service.delete_user(str(ObjectId()))
|
||||||
|
assert delete_result is False
|
||||||
|
|
||||||
|
# Try to check user existence
|
||||||
|
exists = user_service.user_exists("nonexistent")
|
||||||
|
assert exists is False
|
||||||
@@ -1,187 +0,0 @@
|
|||||||
"""
|
|
||||||
Unit tests for MongoDB database connection module.
|
|
||||||
|
|
||||||
Tests the database connection functionality with mocking
|
|
||||||
to avoid requiring actual MongoDB instance during tests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from unittest.mock import Mock, patch, MagicMock
|
|
||||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
|
||||||
|
|
||||||
from app.database.connection import (
|
|
||||||
create_mongodb_client,
|
|
||||||
get_database,
|
|
||||||
close_database_connection,
|
|
||||||
get_mongodb_client,
|
|
||||||
test_database_connection
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_get_database_connection():
|
|
||||||
"""Test successful database connection creation."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_database = Mock()
|
|
||||||
|
|
||||||
# Configure the mock to support dictionary-like access
|
|
||||||
mock_client.__getitem__ = Mock(return_value=mock_database)
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://localhost:27017"):
|
|
||||||
with patch('app.database.connection.get_mongodb_database_name', return_value="testdb"):
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
result = get_database()
|
|
||||||
|
|
||||||
assert result == mock_database
|
|
||||||
mock_client.admin.command.assert_called_with('ping')
|
|
||||||
# Verify that __getitem__ was called with the database name
|
|
||||||
mock_client.__getitem__.assert_called_with("testdb")
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_cannot_connect_to_invalid_mongodb_url():
|
|
||||||
"""Test fail-fast behavior with invalid MongoDB URL."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_client.admin.command.side_effect = ConnectionFailure("Connection failed")
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://invalid:27017"):
|
|
||||||
with pytest.raises(SystemExit) as exc_info:
|
|
||||||
create_mongodb_client()
|
|
||||||
|
|
||||||
assert exc_info.value.code == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_cannot_connect_with_server_selection_timeout():
|
|
||||||
"""Test fail-fast behavior with server selection timeout."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_client.admin.command.side_effect = ServerSelectionTimeoutError("Timeout")
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://timeout:27017"):
|
|
||||||
with pytest.raises(SystemExit) as exc_info:
|
|
||||||
create_mongodb_client()
|
|
||||||
|
|
||||||
assert exc_info.value.code == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_cannot_connect_with_unexpected_error():
|
|
||||||
"""Test fail-fast behavior with unexpected connection error."""
|
|
||||||
with patch('app.database.connection.MongoClient', side_effect=Exception("Unexpected error")):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://error:27017"):
|
|
||||||
with pytest.raises(SystemExit) as exc_info:
|
|
||||||
create_mongodb_client()
|
|
||||||
|
|
||||||
assert exc_info.value.code == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_get_database_singleton():
|
|
||||||
"""Test that get_database returns the same instance (singleton pattern)."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_database = Mock()
|
|
||||||
mock_client.__getitem__ = Mock(return_value=mock_database)
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://localhost:27017"):
|
|
||||||
with patch('app.database.connection.get_mongodb_database_name', return_value="testdb"):
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
# First call
|
|
||||||
db1 = get_database()
|
|
||||||
# Second call
|
|
||||||
db2 = get_database()
|
|
||||||
|
|
||||||
assert db1 is db2
|
|
||||||
# MongoClient should be called only once
|
|
||||||
assert mock_client.admin.command.call_count == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_close_database_connection():
|
|
||||||
"""Test closing database connection."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_database = Mock()
|
|
||||||
mock_client.__getitem__ = Mock(return_value=mock_database)
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://localhost:27017"):
|
|
||||||
with patch('app.database.connection.get_mongodb_database_name', return_value="testdb"):
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
# Create connection
|
|
||||||
get_database()
|
|
||||||
|
|
||||||
# Close connection
|
|
||||||
close_database_connection()
|
|
||||||
|
|
||||||
mock_client.close.assert_called_once()
|
|
||||||
assert app.database.connection._client is None
|
|
||||||
assert app.database.connection._database is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_get_mongodb_client():
|
|
||||||
"""Test getting raw MongoDB client instance."""
|
|
||||||
mock_client = Mock()
|
|
||||||
mock_database = Mock()
|
|
||||||
mock_client.__getitem__ = Mock(return_value=mock_database)
|
|
||||||
|
|
||||||
with patch('app.database.connection.MongoClient', return_value=mock_client):
|
|
||||||
with patch('app.database.connection.get_mongodb_url', return_value="mongodb://localhost:27017"):
|
|
||||||
with patch('app.database.connection.get_mongodb_database_name', return_value="testdb"):
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
# Create connection first
|
|
||||||
get_database()
|
|
||||||
|
|
||||||
# Get client
|
|
||||||
result = get_mongodb_client()
|
|
||||||
|
|
||||||
assert result == mock_client
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_get_none_mongodb_client_when_not_connected():
|
|
||||||
"""Test getting MongoDB client returns None when not connected."""
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
result = get_mongodb_client()
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_test_database_connection_success():
|
|
||||||
"""Test database connection health check - success case."""
|
|
||||||
mock_database = Mock()
|
|
||||||
mock_database.command.return_value = True
|
|
||||||
|
|
||||||
with patch('app.database.connection.get_database', return_value=mock_database):
|
|
||||||
result = test_database_connection()
|
|
||||||
|
|
||||||
assert result is True
|
|
||||||
mock_database.command.assert_called_with('ping')
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_close_connection_when_no_client():
|
|
||||||
"""Test closing connection when no client exists (should not raise error)."""
|
|
||||||
# Reset global variables
|
|
||||||
import app.database.connection
|
|
||||||
app.database.connection._client = None
|
|
||||||
app.database.connection._database = None
|
|
||||||
|
|
||||||
# Should not raise any exception
|
|
||||||
close_database_connection()
|
|
||||||
|
|
||||||
assert app.database.connection._client is None
|
|
||||||
assert app.database.connection._database is None
|
|
||||||
@@ -1,311 +0,0 @@
|
|||||||
"""
|
|
||||||
Test suite for DocumentContentRepository with async/await support.
|
|
||||||
|
|
||||||
This module contains comprehensive tests for all DocumentContentRepository methods
|
|
||||||
using mongomock-motor for in-memory MongoDB testing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
import hashlib
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import pytest_asyncio
|
|
||||||
from bson import ObjectId
|
|
||||||
from pymongo.errors import DuplicateKeyError
|
|
||||||
from mongomock_motor import AsyncMongoMockClient
|
|
||||||
|
|
||||||
from app.database.repositories.document_content_repository import DocumentContentRepository
|
|
||||||
from app.models.document import DocumentContent
|
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
|
||||||
async def in_memory_repository():
|
|
||||||
"""Create an in-memory DocumentContentRepository for testing."""
|
|
||||||
client = AsyncMongoMockClient()
|
|
||||||
db = client.test_database
|
|
||||||
repo = DocumentContentRepository(db)
|
|
||||||
await repo.initialize()
|
|
||||||
return repo
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sample_document_content():
|
|
||||||
"""Sample DocumentContent data for testing."""
|
|
||||||
content = "This is sample document content for testing purposes."
|
|
||||||
file_hash = hashlib.sha256(content.encode()).hexdigest()
|
|
||||||
|
|
||||||
return DocumentContent(
|
|
||||||
file_hash=file_hash,
|
|
||||||
content=content,
|
|
||||||
encoding="utf-8",
|
|
||||||
file_size=len(content.encode()),
|
|
||||||
mime_type="text/plain"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def another_document_content():
|
|
||||||
"""Another sample DocumentContent data for testing."""
|
|
||||||
content = "This is another sample document with different content."
|
|
||||||
file_hash = hashlib.sha256(content.encode()).hexdigest()
|
|
||||||
|
|
||||||
return DocumentContent(
|
|
||||||
file_hash=file_hash,
|
|
||||||
content=content,
|
|
||||||
encoding="utf-8",
|
|
||||||
file_size=len(content.encode()),
|
|
||||||
mime_type="text/plain"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentContentRepositoryCreation:
|
|
||||||
"""Tests for document content creation functionality."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_create_document_content(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test successful document content creation."""
|
|
||||||
# Act
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert created_content is not None
|
|
||||||
assert created_content.file_hash == sample_document_content.file_hash
|
|
||||||
assert created_content.content == sample_document_content.content
|
|
||||||
assert created_content.encoding == sample_document_content.encoding
|
|
||||||
assert created_content.file_size == sample_document_content.file_size
|
|
||||||
assert created_content.mime_type == sample_document_content.mime_type
|
|
||||||
assert created_content.id is not None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_create_document_content_with_duplicate_file_hash(self, in_memory_repository,
|
|
||||||
sample_document_content):
|
|
||||||
"""Test that creating document content with duplicate file_hash raises DuplicateKeyError."""
|
|
||||||
# Arrange
|
|
||||||
await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Act & Assert
|
|
||||||
with pytest.raises(DuplicateKeyError) as exc_info:
|
|
||||||
await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
assert "already exists" in str(exc_info.value)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentContentRepositoryFinding:
|
|
||||||
"""Tests for document content finding functionality."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_find_document_content_by_id(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test finding document content by valid ID."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_id(str(created_content.id))
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert found_content is not None
|
|
||||||
assert found_content.id == created_content.id
|
|
||||||
assert found_content.file_hash == created_content.file_hash
|
|
||||||
assert found_content.content == created_content.content
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_find_document_content_by_invalid_id(self, in_memory_repository):
|
|
||||||
"""Test that invalid ObjectId returns None."""
|
|
||||||
# Act
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_id("invalid_id")
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert found_content is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_find_document_content_by_nonexistent_id(self, in_memory_repository):
|
|
||||||
"""Test that nonexistent but valid ObjectId returns None."""
|
|
||||||
# Arrange
|
|
||||||
nonexistent_id = str(ObjectId())
|
|
||||||
|
|
||||||
# Act
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_id(nonexistent_id)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert found_content is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_find_document_content_by_file_hash(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test finding document content by file hash."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_file_hash(sample_document_content.file_hash)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert found_content is not None
|
|
||||||
assert found_content.file_hash == created_content.file_hash
|
|
||||||
assert found_content.id == created_content.id
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_find_document_content_by_nonexistent_file_hash(self, in_memory_repository):
|
|
||||||
"""Test that nonexistent file hash returns None."""
|
|
||||||
# Act
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_file_hash("nonexistent_hash")
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert found_content is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentContentRepositoryUpdate:
|
|
||||||
"""Tests for document content update functionality."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_update_document_content(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test successful document content update."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
update_data = {
|
|
||||||
"content": "Updated content for testing",
|
|
||||||
"encoding": "utf-16",
|
|
||||||
"mime_type": "text/html"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Act
|
|
||||||
updated_content = await in_memory_repository.update_document_content(str(created_content.id), update_data)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert updated_content is not None
|
|
||||||
assert updated_content.content == update_data["content"]
|
|
||||||
assert updated_content.encoding == update_data["encoding"]
|
|
||||||
assert updated_content.mime_type == update_data["mime_type"]
|
|
||||||
assert updated_content.id == created_content.id
|
|
||||||
assert updated_content.file_hash == created_content.file_hash # Should remain unchanged
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_update_document_content_with_invalid_id(self, in_memory_repository):
|
|
||||||
"""Test that updating with invalid ID returns None."""
|
|
||||||
# Act
|
|
||||||
result = await in_memory_repository.update_document_content("invalid_id", {"content": "test"})
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_update_document_content_with_partial_data(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test updating document content with partial data."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
partial_update = {"encoding": "iso-8859-1"}
|
|
||||||
|
|
||||||
# Act
|
|
||||||
updated_content = await in_memory_repository.update_document_content(str(created_content.id), partial_update)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert updated_content is not None
|
|
||||||
assert updated_content.encoding == "iso-8859-1"
|
|
||||||
assert updated_content.content == created_content.content # Should remain unchanged
|
|
||||||
assert updated_content.mime_type == created_content.mime_type # Should remain unchanged
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_update_document_content_with_empty_data(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test updating document content with empty data returns current content."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
empty_update = {}
|
|
||||||
|
|
||||||
# Act
|
|
||||||
result = await in_memory_repository.update_document_content(str(created_content.id), empty_update)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert result is not None
|
|
||||||
assert result.content == created_content.content
|
|
||||||
assert result.encoding == created_content.encoding
|
|
||||||
assert result.mime_type == created_content.mime_type
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentContentRepositoryDeletion:
|
|
||||||
"""Tests for document content deletion functionality."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_delete_document_content(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test successful document content deletion."""
|
|
||||||
# Arrange
|
|
||||||
created_content = await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
deletion_result = await in_memory_repository.delete_document_content(str(created_content.id))
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert deletion_result is True
|
|
||||||
|
|
||||||
# Verify content is actually deleted
|
|
||||||
found_content = await in_memory_repository.find_document_content_by_id(str(created_content.id))
|
|
||||||
assert found_content is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_delete_document_content_with_invalid_id(self, in_memory_repository):
|
|
||||||
"""Test that deleting with invalid ID returns False."""
|
|
||||||
# Act
|
|
||||||
result = await in_memory_repository.delete_document_content("invalid_id")
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert result is False
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_cannot_delete_nonexistent_document_content(self, in_memory_repository):
|
|
||||||
"""Test that deleting nonexistent document content returns False."""
|
|
||||||
# Arrange
|
|
||||||
nonexistent_id = str(ObjectId())
|
|
||||||
|
|
||||||
# Act
|
|
||||||
result = await in_memory_repository.delete_document_content(nonexistent_id)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert result is False
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentContentRepositoryUtilities:
|
|
||||||
"""Tests for utility methods."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_check_content_exists(self, in_memory_repository, sample_document_content):
|
|
||||||
"""Test checking if document content exists by file hash."""
|
|
||||||
# Arrange
|
|
||||||
await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
exists = await in_memory_repository.content_exists(sample_document_content.file_hash)
|
|
||||||
not_exists = await in_memory_repository.content_exists("nonexistent_hash")
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert exists is True
|
|
||||||
assert not_exists is False
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_list_document_contents(self, in_memory_repository, sample_document_content,
|
|
||||||
another_document_content):
|
|
||||||
"""Test listing document contents with pagination."""
|
|
||||||
# Arrange
|
|
||||||
await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
await in_memory_repository.create_document_content(another_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
all_contents = await in_memory_repository.list_document_contents()
|
|
||||||
limited_contents = await in_memory_repository.list_document_contents(skip=0, limit=1)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert len(all_contents) == 2
|
|
||||||
assert len(limited_contents) == 1
|
|
||||||
assert all(isinstance(content, DocumentContent) for content in all_contents)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_i_can_count_document_contents(self, in_memory_repository, sample_document_content,
|
|
||||||
another_document_content):
|
|
||||||
"""Test counting document contents."""
|
|
||||||
# Arrange
|
|
||||||
initial_count = await in_memory_repository.count_document_contents()
|
|
||||||
await in_memory_repository.create_document_content(sample_document_content)
|
|
||||||
await in_memory_repository.create_document_content(another_document_content)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
final_count = await in_memory_repository.count_document_contents()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert final_count == initial_count + 2
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user