Working on document repository
This commit is contained in:
@@ -0,0 +1,248 @@
|
||||
"""
|
||||
File repository for database operations on FileDocument collection.
|
||||
|
||||
This module provides data access operations for file documents stored
|
||||
in MongoDB with proper error handling and type safety.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from bson import ObjectId
|
||||
from pymongo.errors import DuplicateKeyError, PyMongoError
|
||||
from difflib import SequenceMatcher
|
||||
from motor.motor_asyncio import AsyncIOMotorCollection
|
||||
from app.models.document import FileDocument
|
||||
from app.database.connection import get_database
|
||||
|
||||
|
||||
class FileDocumentRepository:
|
||||
"""
|
||||
Repository class for file document database operations.
|
||||
|
||||
This class handles all database operations for FileDocument objects
|
||||
with proper error handling and data validation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize file repository with database connection."""
|
||||
self.db = get_database()
|
||||
self.collection: AsyncIOMotorCollection = self.db.files
|
||||
self._ensure_indexes()
|
||||
|
||||
async def _ensure_indexes(self):
|
||||
"""
|
||||
Ensure required database indexes exist.
|
||||
|
||||
Creates unique index on username field to prevent duplicates.
|
||||
"""
|
||||
try:
|
||||
await self.collection.create_index("filepath", unique=True)
|
||||
except PyMongoError:
|
||||
# Index might already exist, ignore error
|
||||
pass
|
||||
|
||||
async def create_document(self, file_data: FileDocument) -> FileDocument:
|
||||
"""
|
||||
Create a new file document in database.
|
||||
|
||||
Args:
|
||||
file_data (FileDocument): File document data to create
|
||||
|
||||
Returns:
|
||||
FileDocument: Created file document with database ID
|
||||
|
||||
Raises:
|
||||
ValueError: If file creation fails due to validation
|
||||
DuplicateKeyError: If file with same hash already exists
|
||||
"""
|
||||
try:
|
||||
file_dict = file_data.model_dump(by_alias=True, exclude_unset=True)
|
||||
if "_id" in file_dict and file_dict["_id"] is None:
|
||||
del file_dict["_id"]
|
||||
|
||||
result = await self.collection.insert_one(file_dict)
|
||||
file_data.id = result.inserted_id
|
||||
return file_data
|
||||
|
||||
except DuplicateKeyError as e:
|
||||
raise DuplicateKeyError(f"File with same hash already exists: {e}")
|
||||
except PyMongoError as e:
|
||||
raise ValueError(f"Failed to create file document: {e}")
|
||||
|
||||
async def find_document_by_id(self, file_id: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Find file document by ID.
|
||||
|
||||
Args:
|
||||
file_id (str): File document ID to search for
|
||||
|
||||
Returns:
|
||||
FileDocument or None: File document if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
if not ObjectId.is_valid(file_id):
|
||||
return None
|
||||
|
||||
file_doc = await self.collection.find_one({"_id": ObjectId(file_id)})
|
||||
if file_doc:
|
||||
return FileDocument(**file_doc)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
async def find_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Find file document by file hash to detect duplicates.
|
||||
|
||||
Args:
|
||||
file_hash (str): SHA256 hash of file content
|
||||
|
||||
Returns:
|
||||
FileDocument or None: File document if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
file_doc = await self.collection.find_one({"file_hash": file_hash})
|
||||
if file_doc:
|
||||
return FileDocument(**file_doc)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
async def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
|
||||
"""
|
||||
Find file document by exact filepath.
|
||||
|
||||
Args:
|
||||
filepath (str): Full path to the file
|
||||
|
||||
Returns:
|
||||
FileDocument or None: File document if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
file_doc = await self.collection.find_one({"filepath": filepath})
|
||||
if file_doc:
|
||||
return FileDocument(**file_doc)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
async def find_document_by_name(self, filename: str, similarity_threshold: float = 0.6) -> List[FileDocument]:
|
||||
"""
|
||||
Find file documents by filename using fuzzy matching.
|
||||
|
||||
Args:
|
||||
filename (str): Filename to search for
|
||||
similarity_threshold (float): Minimum similarity ratio (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
List[FileDocument]: List of matching files sorted by similarity score
|
||||
"""
|
||||
try:
|
||||
# Get all files from database
|
||||
cursor = self.collection.find({})
|
||||
all_files = await cursor.to_list(length=None)
|
||||
|
||||
matches = []
|
||||
for file_doc in all_files:
|
||||
file_obj = FileDocument(**file_doc)
|
||||
# Calculate similarity between search term and filename
|
||||
similarity = SequenceMatcher(None, filename.lower(), file_obj.filename.lower()).ratio()
|
||||
|
||||
if similarity >= similarity_threshold:
|
||||
matches.append((file_obj, similarity))
|
||||
|
||||
# Sort by similarity score (highest first)
|
||||
matches.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Return only the FileDocument objects
|
||||
return [match[0] for match in matches]
|
||||
|
||||
except PyMongoError:
|
||||
return []
|
||||
|
||||
async def list_documents(self, skip: int = 0, limit: int = 100) -> List[FileDocument]:
|
||||
"""
|
||||
List file documents with pagination.
|
||||
|
||||
Args:
|
||||
skip (int): Number of documents to skip (default: 0)
|
||||
limit (int): Maximum number of documents to return (default: 100)
|
||||
|
||||
Returns:
|
||||
List[FileDocument]: List of file documents
|
||||
"""
|
||||
try:
|
||||
cursor = self.collection.find({}).skip(skip).limit(limit).sort("detected_at", -1)
|
||||
file_docs = await cursor.to_list(length=limit)
|
||||
return [FileDocument(**doc) for doc in file_docs]
|
||||
|
||||
except PyMongoError:
|
||||
return []
|
||||
|
||||
async def count_documents(self) -> int:
|
||||
"""
|
||||
Count total number of file documents.
|
||||
|
||||
Returns:
|
||||
int: Total number of file documents in collection
|
||||
"""
|
||||
try:
|
||||
return await self.collection.count_documents({})
|
||||
except PyMongoError:
|
||||
return 0
|
||||
|
||||
async def update_document(self, file_id: str, update_data: dict) -> Optional[FileDocument]:
|
||||
"""
|
||||
Update file document with new data.
|
||||
|
||||
Args:
|
||||
file_id (str): File document ID to update
|
||||
update_data (dict): Fields to update
|
||||
|
||||
Returns:
|
||||
FileDocument or None: Updated file document if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
if not ObjectId.is_valid(file_id):
|
||||
return None
|
||||
|
||||
# Remove None values from update data
|
||||
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
||||
|
||||
if not clean_update_data:
|
||||
return await self.find_document_by_id(file_id)
|
||||
|
||||
result = await self.collection.find_one_and_update(
|
||||
{"_id": ObjectId(file_id)},
|
||||
{"$set": clean_update_data},
|
||||
return_document=True
|
||||
)
|
||||
|
||||
if result:
|
||||
return FileDocument(**result)
|
||||
return None
|
||||
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
async def delete_document(self, file_id: str) -> bool:
|
||||
"""
|
||||
Delete file document from database.
|
||||
|
||||
Args:
|
||||
file_id (str): File document ID to delete
|
||||
|
||||
Returns:
|
||||
bool: True if file was deleted, False otherwise
|
||||
"""
|
||||
try:
|
||||
if not ObjectId.is_valid(file_id):
|
||||
return False
|
||||
|
||||
result = await self.collection.delete_one({"_id": ObjectId(file_id)})
|
||||
return result.deleted_count > 0
|
||||
|
||||
except PyMongoError:
|
||||
return False
|
||||
@@ -2,15 +2,14 @@
|
||||
User repository for MongoDB operations.
|
||||
|
||||
This module implements the repository pattern for user CRUD operations
|
||||
with dependency injection of the database connection.
|
||||
with dependency injection of the database connection using async/await.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from bson import ObjectId
|
||||
from pymongo.database import Database
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
from pymongo.collection import Collection
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase, AsyncIOMotorCollection
|
||||
from pymongo.errors import DuplicateKeyError, PyMongoError
|
||||
|
||||
from app.models.user import UserCreate, UserInDB, UserUpdate
|
||||
from app.utils.security import hash_password
|
||||
@@ -21,35 +20,33 @@ class UserRepository:
|
||||
Repository class for user CRUD operations in MongoDB.
|
||||
|
||||
This class handles all database operations related to users,
|
||||
following the repository pattern with dependency injection.
|
||||
following the repository pattern with dependency injection and async/await.
|
||||
"""
|
||||
|
||||
def __init__(self, database: Database):
|
||||
def __init__(self, database: AsyncIOMotorDatabase):
|
||||
"""
|
||||
Initialize repository with database dependency.
|
||||
|
||||
Args:
|
||||
database (Database): MongoDB database instance
|
||||
database (AsyncIOMotorDatabase): MongoDB database instance
|
||||
"""
|
||||
self.db = database
|
||||
self.collection: Collection = database.users
|
||||
|
||||
# Create unique index on username for duplicate prevention
|
||||
self.collection: AsyncIOMotorCollection = database.users
|
||||
self._ensure_indexes()
|
||||
|
||||
def _ensure_indexes(self):
|
||||
async def _ensure_indexes(self):
|
||||
"""
|
||||
Ensure required database indexes exist.
|
||||
|
||||
Creates unique index on username field to prevent duplicates.
|
||||
"""
|
||||
try:
|
||||
self.collection.create_index("username", unique=True)
|
||||
except Exception:
|
||||
await self.collection.create_index("username", unique=True)
|
||||
except PyMongoError:
|
||||
# Index might already exist, ignore error
|
||||
pass
|
||||
|
||||
def create_user(self, user_data: UserCreate) -> UserInDB:
|
||||
async def create_user(self, user_data: UserCreate) -> UserInDB:
|
||||
"""
|
||||
Create a new user in the database.
|
||||
|
||||
@@ -61,6 +58,7 @@ class UserRepository:
|
||||
|
||||
Raises:
|
||||
DuplicateKeyError: If username already exists
|
||||
ValueError: If user creation fails due to validation
|
||||
"""
|
||||
user_dict = {
|
||||
"username": user_data.username,
|
||||
@@ -73,13 +71,15 @@ class UserRepository:
|
||||
}
|
||||
|
||||
try:
|
||||
result = self.collection.insert_one(user_dict)
|
||||
result = await self.collection.insert_one(user_dict)
|
||||
user_dict["_id"] = result.inserted_id
|
||||
return UserInDB(**user_dict)
|
||||
except DuplicateKeyError:
|
||||
raise DuplicateKeyError(f"User with username '{user_data.username}' already exists")
|
||||
except DuplicateKeyError as e:
|
||||
raise DuplicateKeyError(f"User with username '{user_data.username}' already exists: {e}")
|
||||
except PyMongoError as e:
|
||||
raise ValueError(f"Failed to create user: {e}")
|
||||
|
||||
def find_user_by_username(self, username: str) -> Optional[UserInDB]:
|
||||
async def find_user_by_username(self, username: str) -> Optional[UserInDB]:
|
||||
"""
|
||||
Find user by username.
|
||||
|
||||
@@ -89,12 +89,15 @@ class UserRepository:
|
||||
Returns:
|
||||
UserInDB or None: User if found, None otherwise
|
||||
"""
|
||||
user_doc = self.collection.find_one({"username": username})
|
||||
if user_doc:
|
||||
return UserInDB(**user_doc)
|
||||
return None
|
||||
try:
|
||||
user_doc = await self.collection.find_one({"username": username})
|
||||
if user_doc:
|
||||
return UserInDB(**user_doc)
|
||||
return None
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
|
||||
async def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
|
||||
"""
|
||||
Find user by ID.
|
||||
|
||||
@@ -105,16 +108,17 @@ class UserRepository:
|
||||
UserInDB or None: User if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
object_id = ObjectId(user_id)
|
||||
user_doc = self.collection.find_one({"_id": object_id})
|
||||
if not ObjectId.is_valid(user_id):
|
||||
return None
|
||||
|
||||
user_doc = await self.collection.find_one({"_id": ObjectId(user_id)})
|
||||
if user_doc:
|
||||
return UserInDB(**user_doc)
|
||||
except Exception:
|
||||
# Invalid ObjectId format
|
||||
pass
|
||||
return None
|
||||
return None
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def find_user_by_email(self, email: str) -> Optional[UserInDB]:
|
||||
async def find_user_by_email(self, email: str) -> Optional[UserInDB]:
|
||||
"""
|
||||
Find user by email address.
|
||||
|
||||
@@ -124,12 +128,15 @@ class UserRepository:
|
||||
Returns:
|
||||
UserInDB or None: User if found, None otherwise
|
||||
"""
|
||||
user_doc = self.collection.find_one({"email": email})
|
||||
if user_doc:
|
||||
return UserInDB(**user_doc)
|
||||
return None
|
||||
try:
|
||||
user_doc = await self.collection.find_one({"email": email})
|
||||
if user_doc:
|
||||
return UserInDB(**user_doc)
|
||||
return None
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
|
||||
async def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
|
||||
"""
|
||||
Update user information.
|
||||
|
||||
@@ -141,11 +148,12 @@ class UserRepository:
|
||||
UserInDB or None: Updated user if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
object_id = ObjectId(user_id)
|
||||
if not ObjectId.is_valid(user_id):
|
||||
return None
|
||||
|
||||
# Build update document with only provided fields
|
||||
update_data = {"updated_at": datetime.now()}
|
||||
|
||||
|
||||
if user_update.username is not None:
|
||||
update_data["username"] = user_update.username
|
||||
if user_update.email is not None:
|
||||
@@ -157,20 +165,26 @@ class UserRepository:
|
||||
if user_update.is_active is not None:
|
||||
update_data["is_active"] = user_update.is_active
|
||||
|
||||
result = self.collection.update_one(
|
||||
{"_id": object_id},
|
||||
{"$set": update_data}
|
||||
# Remove None values from update data
|
||||
clean_update_data = {k: v for k, v in update_data.items() if v is not None}
|
||||
|
||||
if not clean_update_data:
|
||||
return await self.find_user_by_id(user_id)
|
||||
|
||||
result = await self.collection.find_one_and_update(
|
||||
{"_id": ObjectId(user_id)},
|
||||
{"$set": clean_update_data},
|
||||
return_document=True
|
||||
)
|
||||
|
||||
if result.matched_count > 0:
|
||||
return self.find_user_by_id(user_id)
|
||||
if result:
|
||||
return UserInDB(**result)
|
||||
return None
|
||||
|
||||
except Exception:
|
||||
# Invalid ObjectId format or other errors
|
||||
pass
|
||||
return None
|
||||
except PyMongoError:
|
||||
return None
|
||||
|
||||
def delete_user(self, user_id: str) -> bool:
|
||||
async def delete_user(self, user_id: str) -> bool:
|
||||
"""
|
||||
Delete user from database.
|
||||
|
||||
@@ -181,14 +195,15 @@ class UserRepository:
|
||||
bool: True if user was deleted, False otherwise
|
||||
"""
|
||||
try:
|
||||
object_id = ObjectId(user_id)
|
||||
result = self.collection.delete_one({"_id": object_id})
|
||||
if not ObjectId.is_valid(user_id):
|
||||
return False
|
||||
|
||||
result = await self.collection.delete_one({"_id": ObjectId(user_id)})
|
||||
return result.deleted_count > 0
|
||||
except Exception:
|
||||
# Invalid ObjectId format
|
||||
except PyMongoError:
|
||||
return False
|
||||
|
||||
def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
|
||||
async def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
|
||||
"""
|
||||
List users with pagination.
|
||||
|
||||
@@ -199,19 +214,26 @@ class UserRepository:
|
||||
Returns:
|
||||
List[UserInDB]: List of users
|
||||
"""
|
||||
cursor = self.collection.find().skip(skip).limit(limit)
|
||||
return [UserInDB(**user_doc) for user_doc in cursor]
|
||||
try:
|
||||
cursor = self.collection.find({}).skip(skip).limit(limit).sort("created_at", -1)
|
||||
user_docs = await cursor.to_list(length=limit)
|
||||
return [UserInDB(**user_doc) for user_doc in user_docs]
|
||||
except PyMongoError:
|
||||
return []
|
||||
|
||||
def count_users(self) -> int:
|
||||
async def count_users(self) -> int:
|
||||
"""
|
||||
Count total number of users.
|
||||
|
||||
Returns:
|
||||
int: Total number of users in database
|
||||
"""
|
||||
return self.collection.count_documents({})
|
||||
try:
|
||||
return await self.collection.count_documents({})
|
||||
except PyMongoError:
|
||||
return 0
|
||||
|
||||
def user_exists(self, username: str) -> bool:
|
||||
async def user_exists(self, username: str) -> bool:
|
||||
"""
|
||||
Check if user exists by username.
|
||||
|
||||
@@ -221,4 +243,8 @@ class UserRepository:
|
||||
Returns:
|
||||
bool: True if user exists, False otherwise
|
||||
"""
|
||||
return self.collection.count_documents({"username": username}) > 0
|
||||
try:
|
||||
count = await self.collection.count_documents({"username": username})
|
||||
return count > 0
|
||||
except PyMongoError:
|
||||
return False
|
||||
|
||||
@@ -119,7 +119,6 @@ async def create_user(
|
||||
):
|
||||
return user_service.create_user(user_data)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
|
||||
142
src/file-processor/app/models/document.py
Normal file
142
src/file-processor/app/models/document.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Pydantic models for document processing collections.
|
||||
|
||||
This module defines the data models for file documents and processing jobs
|
||||
stored in MongoDB collections.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from app.models.types import PyObjectId
|
||||
|
||||
|
||||
class FileType(str, Enum):
|
||||
"""Supported file types for document processing."""
|
||||
|
||||
TXT = "txt"
|
||||
PDF = "pdf"
|
||||
DOCX = "docx"
|
||||
JPG = "jpg"
|
||||
PNG = "png"
|
||||
|
||||
|
||||
class ExtractionMethod(str, Enum):
|
||||
"""Methods used to extract content from documents."""
|
||||
|
||||
DIRECT_TEXT = "direct_text"
|
||||
OCR = "ocr"
|
||||
HYBRID = "hybrid"
|
||||
|
||||
|
||||
class ProcessingStatus(str, Enum):
|
||||
"""Status values for processing jobs."""
|
||||
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class FileDocument(BaseModel):
|
||||
"""
|
||||
Model for file documents stored in the 'files' collection.
|
||||
|
||||
Represents a file detected in the watched directory with its
|
||||
metadata and extracted content.
|
||||
"""
|
||||
|
||||
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||
filename: str = Field(..., description="Original filename")
|
||||
filepath: str = Field(..., description="Full path to the file")
|
||||
file_type: FileType = Field(..., description="Type of the file")
|
||||
extraction_method: Optional[ExtractionMethod] = Field(default=None, description="Method used to extract content")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
|
||||
detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
|
||||
file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
|
||||
|
||||
@field_validator('filepath')
|
||||
@classmethod
|
||||
def validate_filepath(cls, v: str) -> str:
|
||||
"""Validate filepath format."""
|
||||
if not v.strip():
|
||||
raise ValueError("Filepath cannot be empty")
|
||||
return v.strip()
|
||||
|
||||
@field_validator('filename')
|
||||
@classmethod
|
||||
def validate_filename(cls, v: str) -> str:
|
||||
"""Validate filename format."""
|
||||
if not v.strip():
|
||||
raise ValueError("Filename cannot be empty")
|
||||
return v.strip()
|
||||
|
||||
class Config:
|
||||
"""Pydantic configuration."""
|
||||
populate_by_name = True
|
||||
arbitrary_types_allowed = True
|
||||
json_encoders = {ObjectId: str}
|
||||
|
||||
|
||||
class DocumentContent(BaseModel):
|
||||
"""Model for document content."""
|
||||
|
||||
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||
file_hash: Optional[str] = Field(..., description="SHA256 hash of file content")
|
||||
content: str = Field(..., description="File content")
|
||||
encoding: str = Field(default="utf-8", description="Character encoding for text files")
|
||||
file_size: int = Field(..., ge=0, description="File size in bytes")
|
||||
mime_type: str = Field(..., description="MIME type detected")
|
||||
|
||||
|
||||
class ProcessingJob(BaseModel):
|
||||
"""
|
||||
Model for processing jobs stored in the 'processing_jobs' collection.
|
||||
|
||||
Tracks the lifecycle and status of document processing tasks.
|
||||
"""
|
||||
|
||||
id: Optional[PyObjectId] = Field(default=None, alias="_id")
|
||||
file_id: PyObjectId = Field(..., description="Reference to file document")
|
||||
status: ProcessingStatus = Field(
|
||||
default=ProcessingStatus.PENDING,
|
||||
description="Current processing status"
|
||||
)
|
||||
task_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Celery task UUID"
|
||||
)
|
||||
created_at: Optional[datetime] = Field(
|
||||
default=None,
|
||||
description="Timestamp when job was created"
|
||||
)
|
||||
started_at: Optional[datetime] = Field(
|
||||
default=None,
|
||||
description="Timestamp when processing started"
|
||||
)
|
||||
completed_at: Optional[datetime] = Field(
|
||||
default=None,
|
||||
description="Timestamp when processing completed"
|
||||
)
|
||||
error_message: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Error message if processing failed"
|
||||
)
|
||||
|
||||
@field_validator('error_message')
|
||||
@classmethod
|
||||
def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
|
||||
"""Clean up error message."""
|
||||
if v is not None:
|
||||
return v.strip() if v.strip() else None
|
||||
return v
|
||||
|
||||
class Config:
|
||||
"""Pydantic configuration."""
|
||||
populate_by_name = True
|
||||
arbitrary_types_allowed = True
|
||||
json_encoders = {ObjectId: str}
|
||||
32
src/file-processor/app/models/types.py
Normal file
32
src/file-processor/app/models/types.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from typing import Any
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic_core import core_schema
|
||||
|
||||
|
||||
class PyObjectId(ObjectId):
|
||||
"""Custom ObjectId type for Pydantic v2 compatibility."""
|
||||
|
||||
@classmethod
|
||||
def __get_pydantic_core_schema__(
|
||||
cls, source_type: Any, handler
|
||||
) -> core_schema.CoreSchema:
|
||||
return core_schema.json_or_python_schema(
|
||||
json_schema=core_schema.str_schema(),
|
||||
python_schema=core_schema.union_schema([
|
||||
core_schema.is_instance_schema(ObjectId),
|
||||
core_schema.chain_schema([
|
||||
core_schema.str_schema(),
|
||||
core_schema.no_info_plain_validator_function(cls.validate),
|
||||
])
|
||||
]),
|
||||
serialization=core_schema.plain_serializer_function_ser_schema(
|
||||
lambda x: str(x)
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not ObjectId.is_valid(v):
|
||||
raise ValueError("Invalid ObjectId")
|
||||
return ObjectId(v)
|
||||
@@ -13,34 +13,7 @@ from pydantic import BaseModel, Field, field_validator, EmailStr
|
||||
from pydantic_core import core_schema
|
||||
|
||||
from app.models.auth import UserRole
|
||||
|
||||
|
||||
class PyObjectId(ObjectId):
|
||||
"""Custom ObjectId type for Pydantic v2 compatibility."""
|
||||
|
||||
@classmethod
|
||||
def __get_pydantic_core_schema__(
|
||||
cls, source_type: Any, handler
|
||||
) -> core_schema.CoreSchema:
|
||||
return core_schema.json_or_python_schema(
|
||||
json_schema=core_schema.str_schema(),
|
||||
python_schema=core_schema.union_schema([
|
||||
core_schema.is_instance_schema(ObjectId),
|
||||
core_schema.chain_schema([
|
||||
core_schema.str_schema(),
|
||||
core_schema.no_info_plain_validator_function(cls.validate),
|
||||
])
|
||||
]),
|
||||
serialization=core_schema.plain_serializer_function_ser_schema(
|
||||
lambda x: str(x)
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not ObjectId.is_valid(v):
|
||||
raise ValueError("Invalid ObjectId")
|
||||
return ObjectId(v)
|
||||
from app.models.types import PyObjectId
|
||||
|
||||
|
||||
def validate_password_strength(password: str) -> str:
|
||||
|
||||
@@ -3,6 +3,7 @@ celery==5.5.3
|
||||
email-validator==2.3.0
|
||||
fastapi==0.116.1
|
||||
httptools==0.6.4
|
||||
motor==3.7.1
|
||||
pymongo==4.15.0
|
||||
pydantic==2.11.9
|
||||
redis==6.4.0
|
||||
|
||||
Reference in New Issue
Block a user