Implemented default pipeline

2025-09-26 22:08:39 +02:00
parent f1b551d243
commit 4de732b0ae
56 changed files with 4534 additions and 2837 deletions
--- a/src/file-processor/Dockerfile
+++ b/src/file-processor/Dockerfile
@@ -3,6 +3,12 @@ FROM python:3.12-slim
 # Set working directory
 WORKDIR /app

+# Install libmagic
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libmagic1 \
+    file \
+ && rm -rf /var/lib/apt/lists/*
+
 # Copy requirements and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
--- a/src/file-processor/app/api/init.py
+++ b/src/file-processor/app/api/init.py
--- a/src/file-processor/app/api/dependencies.py
+++ b/src/file-processor/app/api/dependencies.py
@@ -0,0 +1,100 @@
+import jwt
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from jwt import InvalidTokenError
+from starlette import status
+
+from app.config import settings
+from app.database.connection import get_database
+from app.models.auth import UserRole
+from app.models.user import UserInDB
+from app.services.auth_service import AuthService
+from app.services.user_service import UserService
+
+security = HTTPBearer()
+
+
+def get_auth_service() -> AuthService:
+  """Dependency to get AuthService instance."""
+  return AuthService()
+
+
+def get_user_service() -> UserService:
+  """Dependency to get UserService instance."""
+  database = get_database()
+  return UserService(database)
+
+
+def get_current_user(
+    credentials: HTTPAuthorizationCredentials = Depends(security),
+    user_service: UserService = Depends(get_user_service)
+) -> UserInDB:
+  """
+  Dependency to get current authenticated user from JWT token.
+
+  Args:
+      credentials: HTTP Bearer credentials
+      user_service: Auth service instance
+
+  Returns:
+      User: Current authenticated user
+
+  Raises:
+      HTTPException: If token is invalid or user not found
+  """
+  try:
+    payload = jwt.decode(
+      credentials.credentials,
+      settings.get_jwt_secret_key(),
+      algorithms=[settings.get_jwt_algorithm()]
+    )
+    username: str = payload.get("sub")
+    if username is None:
+      raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+      )
+  except InvalidTokenError:
+    raise HTTPException(
+      status_code=status.HTTP_401_UNAUTHORIZED,
+      detail="Could not validate credentials",
+      headers={"WWW-Authenticate": "Bearer"},
+    )
+  
+  user = user_service.get_user_by_username(username)
+  if user is None:
+    raise HTTPException(
+      status_code=status.HTTP_401_UNAUTHORIZED,
+      detail="Could not validate credentials",
+      headers={"WWW-Authenticate": "Bearer"},
+    )
+  
+  if not user.is_active:
+    raise HTTPException(
+      status_code=status.HTTP_400_BAD_REQUEST,
+      detail="Inactive user"
+    )
+  
+  return user
+
+
+def   get_admin_user(current_user: UserInDB = Depends(get_current_user)) -> UserInDB:
+  """
+  Dependency to ensure current user has admin role.
+
+  Args:
+      current_user: Current authenticated user
+
+  Returns:
+      User: Current user if admin
+
+  Raises:
+      HTTPException: If user is not admin
+  """
+  if current_user.role != UserRole.ADMIN:
+    raise HTTPException(
+      status_code=status.HTTP_403_FORBIDDEN,
+      detail="Not enough permissions"
+    )
+  return current_user
--- a/src/file-processor/app/api/routes/init.py
+++ b/src/file-processor/app/api/routes/init.py
--- a/src/file-processor/app/api/routes/auth.py
+++ b/src/file-processor/app/api/routes/auth.py
@@ -0,0 +1,80 @@
+from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi.security import OAuth2PasswordRequestForm
+
+from app.api.dependencies import get_auth_service, get_current_user, get_user_service
+from app.models.auth import LoginResponse, UserResponse
+from app.models.user import UserInDB
+from app.services.auth_service import AuthService
+from app.services.user_service import UserService
+
+router = APIRouter(tags=["authentication"])
+
+
+@router.post("/login", response_model=LoginResponse)
+def login(
+    form_data: OAuth2PasswordRequestForm = Depends(),
+    auth_service: AuthService = Depends(get_auth_service),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  Authenticate user and return JWT token.
+
+  Args:
+      form_data: OAuth2 password form data
+      auth_service: Auth service instance
+      user_service: User service instance
+
+  Returns:
+      LoginResponse: JWT token and user info
+
+  Raises:
+      HTTPException: If authentication fails
+  """
+  incorrect_username_or_pwd = HTTPException(
+    status_code=status.HTTP_401_UNAUTHORIZED,
+    detail="Incorrect username or password",
+    headers={"WWW-Authenticate": "Bearer"},
+  )
+  
+  user = user_service.get_user_by_username(form_data.username)
+  if (not user or
+      not user.is_active or
+      not auth_service.verify_user_password(form_data.password, user.hashed_password)):
+    raise incorrect_username_or_pwd
+  
+  access_token = auth_service.create_access_token(data={"sub": user.username})
+  
+  return LoginResponse(
+    access_token=access_token,
+    user=UserResponse(
+      _id=user.id,
+      username=user.username,
+      email=user.email,
+      role=user.role,
+      is_active=user.is_active,
+      created_at=user.created_at,
+      updated_at=user.updated_at
+    )
+  )
+
+
+@router.get("/me", response_model=UserResponse)
+def get_current_user_profile(current_user: UserInDB = Depends(get_current_user)):
+  """
+  Get current user profile.
+
+  Args:
+      current_user: Current authenticated user
+
+  Returns:
+      UserResponse: Current user profile without sensitive data
+  """
+  return UserResponse(
+    _id=current_user.id,
+    username=current_user.username,
+    email=current_user.email,
+    role=current_user.role,
+    is_active=current_user.is_active,
+    created_at=current_user.created_at,
+    updated_at=current_user.updated_at
+  )
--- a/src/file-processor/app/api/routes/users.py
+++ b/src/file-processor/app/api/routes/users.py
@@ -0,0 +1,172 @@
+from fastapi import APIRouter, Depends, HTTPException
+from starlette import status
+
+from app.api.dependencies import get_admin_user, get_user_service
+from app.models.auth import UserResponse, MessageResponse
+from app.models.types import PyObjectId
+from app.models.user import UserInDB, UserCreate, UserUpdate
+from app.services.user_service import UserService
+
+router = APIRouter(tags=["users"])
+
+
+@router.get("", response_model=list[UserInDB])
+def list_users(
+    admin_user: UserInDB = Depends(get_admin_user),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  List all users (admin only).
+
+  Args:
+      admin_user: Current admin user
+      user_service: User service instance
+
+  Returns:
+      List[UserResponse]: List of all users without sensitive data
+  """
+  return user_service.list_users()
+
+
+@router.get("/{user_id}", response_model=UserResponse)
+def get_user_by_id(
+    user_id: PyObjectId,
+    admin_user: UserInDB = Depends(get_admin_user),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  Get specific user by ID (admin only).
+
+  Args:
+      user_id: User ID to retrieve
+      admin_user: Current admin user
+      user_service: User service instance
+
+  Returns:
+      UserResponse: User information without sensitive data
+
+  Raises:
+      HTTPException: If user not found
+  """
+  user = user_service.get_user_by_id(str(user_id))
+  if not user:
+    raise HTTPException(
+      status_code=status.HTTP_404_NOT_FOUND,
+      detail="User not found"
+    )
+  
+  return user
+
+
+@router.post("", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
+def create_user(
+    user_data: UserCreate,
+    admin_user: UserInDB = Depends(get_admin_user),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  Create new user (admin only).
+
+  Args:
+      user_data: User creation data
+      admin_user: Current admin user
+      user_service: User service instance
+
+  Returns:
+      UserResponse: Created user information without sensitive data
+
+  Raises:
+      HTTPException: If user creation fails
+  """
+  try:
+    user = user_service.create_user(user_data)
+    return UserResponse(
+      _id=user.id,
+      username=user.username,
+      email=user.email,
+      role=user.role,
+      is_active=user.is_active,
+      created_at=user.created_at,
+      updated_at=user.updated_at
+    )
+  except ValueError as e:
+    raise HTTPException(
+      status_code=status.HTTP_400_BAD_REQUEST,
+      detail=str(e)
+    )
+
+
+@router.put("/{user_id}", response_model=UserResponse)
+def update_user(
+    user_id: PyObjectId,
+    user_data: UserUpdate,
+    admin_user: UserInDB = Depends(get_admin_user),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  Update existing user (admin only).
+
+  Args:
+      user_id: User ID to update
+      user_data: User update data
+      admin_user: Current admin user
+      user_service: User service instance
+
+  Returns:
+      UserResponse: Updated user information without sensitive data
+
+  Raises:
+      HTTPException: If user not found or update fails
+  """
+  try:
+    user = user_service.update_user(str(user_id), user_data)
+    if not user:
+      raise HTTPException(
+        status_code=status.HTTP_404_NOT_FOUND,
+        detail="User not found"
+      )
+    
+    return UserResponse(
+      _id=user.id,
+      username=user.username,
+      email=user.email,
+      role=user.role,
+      is_active=user.is_active,
+      created_at=user.created_at,
+      updated_at=user.updated_at
+    )
+  except ValueError as e:
+    raise HTTPException(
+      status_code=status.HTTP_400_BAD_REQUEST,
+      detail=str(e)
+    )
+
+
+@router.delete("/{user_id}", response_model=MessageResponse)
+def delete_user(
+    user_id: PyObjectId,
+    admin_user: UserInDB = Depends(get_admin_user),
+    user_service: UserService = Depends(get_user_service)
+):
+  """
+  Delete user by ID (admin only).
+
+  Args:
+      user_id: User ID to delete
+      admin_user: Current admin user
+      user_service: User service instance
+
+  Returns:
+      MessageResponse: Success message
+
+  Raises:
+      HTTPException: If user not found or deletion fails
+  """
+  success = user_service.delete_user(str(user_id))
+  if not success:
+    raise HTTPException(
+      status_code=status.HTTP_404_NOT_FOUND,
+      detail="User not found"
+    )
+  
+  return MessageResponse(message="User successfully deleted")
--- a/src/file-processor/app/config/settings.py
+++ b/src/file-processor/app/config/settings.py
@@ -6,7 +6,6 @@ using simple os.getenv() approach without external validation libraries.
 """

 import os
-from typing import Optional


 def get_mongodb_url() -> str:
@@ -31,6 +30,26 @@ def get_mongodb_database_name() -> str:
  return os.getenv("MONGODB_DATABASE", "mydocmanager")


+def get_redis_url() -> str:
+  return os.getenv("REDIS_URL", "redis://localhost:6379/0")
+
+
+# def get_redis_host() -> str:
+#   redis_url = get_redis_url()
+#   if redis_url.startswith("redis://"):
+#     return redis_url.split("redis://")[1].split("/")[0]
+#   else:
+#     return redis_url
+#
+#
+# def get_redis_port() -> int:
+#   redis_url = get_redis_url()
+#   if redis_url.startswith("redis://"):
+#     return int(redis_url.split("redis://")[1].split("/")[0].split(":")[1])
+#   else:
+#     return int(redis_url.split(":")[1])
+
+
 def get_jwt_secret_key() -> str:
  """
  Get JWT secret key from environment variables.
@@ -82,4 +101,19 @@ def is_development_environment() -> bool:
  Returns:
      bool: True if development environment
  """
-  return os.getenv("ENVIRONMENT", "development").lower() == "development"
+  return os.getenv("ENVIRONMENT", "development").lower() == "development"
+
+
+def get_objects_folder() -> str:
+  """
+  Get Vault path from environment variables.
+
+  Returns:
+      str: Vault path
+  """
+  return os.getenv("OBJECTS_FOLDER", "/objects")
+
+
+def watch_directory() -> str:
+  """Directory to monitor for new files"""
+  return os.getenv("WATCH_DIRECTORY", "/watched_files")
--- a/src/file-processor/app/database/connection.py
+++ b/src/file-processor/app/database/connection.py
@@ -7,6 +7,7 @@ The application will terminate if MongoDB is not accessible at startup.

 import sys
 from typing import Optional
+
 from pymongo import MongoClient
 from pymongo.database import Database
 from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
@@ -107,6 +108,15 @@ def get_mongodb_client() -> Optional[MongoClient]:
  return _client


+def get_extra_args(session):
+  # Build kwargs only if session is provided
+  kwargs = {}
+  if session is not None:
+    kwargs["session"] = session
+  
+  return kwargs
+
+
 def test_database_connection() -> bool:
  """
  Test if database connection is working.
@@ -122,4 +132,4 @@ def test_database_connection() -> bool:
    db.command('ping')
    return True
  except Exception:
-    return False
+    return False
--- a/src/file-processor/app/database/repositories/document_content_repository.py
+++ b/src/file-processor/app/database/repositories/document_content_repository.py
@@ -1,214 +0,0 @@
-from typing import List, Optional
-from datetime import datetime
-from motor.motor_asyncio import AsyncIOMotorDatabase, AsyncIOMotorCollection
-from pymongo.errors import DuplicateKeyError, PyMongoError
-from bson import ObjectId
-
-from app.models.document import DocumentContent
-
-
-class DocumentContentRepository:
-  """
-  Repository class for document content CRUD operations in MongoDB.
-
-  This class handles all database operations related to document content,
-  following the repository pattern with dependency injection and async/await.
-  """
-  
-  def __init__(self, database: AsyncIOMotorDatabase):
-    """
-    Initialize repository with database dependency.
-
-    Args:
-        database (AsyncIOMotorDatabase): MongoDB database instance
-    """
-    self.db = database
-    self.collection: AsyncIOMotorCollection = database.document_contents
-    self._ensure_indexes()
-  
-  async def initialize(self):
-    """
-    Initialize repository by ensuring required indexes exist.
-
-    Should be called after repository instantiation to setup database indexes.
-    """
-    await self._ensure_indexes()
-  
-  async def _ensure_indexes(self):
-    """
-    Ensure required database indexes exist.
-
-    Creates unique index on file_hash field to prevent duplicates.
-    """
-    try:
-      await self.collection.create_index("file_hash", unique=True)
-    except PyMongoError:
-      # Index might already exist, ignore error
-      pass
-  
-  async def create_document_content(self, document_content: DocumentContent) -> DocumentContent:
-    """
-    Create a new document content in the database.
-
-    Args:
-        document_content (DocumentContent): Document content data
-
-    Returns:
-        DocumentContent: Created document content with database ID
-
-    Raises:
-        DuplicateKeyError: If file_hash already exists
-        ValueError: If document content creation fails due to validation
-    """
-    document_dict = document_content.model_dump(by_alias=True, exclude_unset=True)
-    
-    # Remove _id if it's None to let MongoDB generate it
-    if document_dict.get("_id") is None:
-      document_dict.pop("_id", None)
-    
-    try:
-      result = await self.collection.insert_one(document_dict)
-      document_dict["_id"] = result.inserted_id
-      return DocumentContent(**document_dict)
-    except DuplicateKeyError as e:
-      raise DuplicateKeyError(f"Document content with file_hash '{document_content.file_hash}' already exists: {e}")
-    except PyMongoError as e:
-      raise ValueError(f"Failed to create document content: {e}")
-  
-  async def find_document_content_by_id(self, document_id: str) -> Optional[DocumentContent]:
-    """
-    Find document content by ID.
-
-    Args:
-        document_id (str): Document content ID to search for
-
-    Returns:
-        DocumentContent or None: Document content if found, None otherwise
-    """
-    try:
-      if not ObjectId.is_valid(document_id):
-        return None
-      
-      document_doc = await self.collection.find_one({"_id": ObjectId(document_id)})
-      if document_doc:
-        return DocumentContent(**document_doc)
-      return None
-    except PyMongoError:
-      return None
-  
-  async def find_document_content_by_file_hash(self, file_hash: str) -> Optional[DocumentContent]:
-    """
-    Find document content by file hash.
-
-    Args:
-        file_hash (str): File hash to search for
-
-    Returns:
-        DocumentContent or None: Document content if found, None otherwise
-    """
-    try:
-      document_doc = await self.collection.find_one({"file_hash": file_hash})
-      if document_doc:
-        return DocumentContent(**document_doc)
-      return None
-    except PyMongoError:
-      return None
-  
-  async def content_exists(self, file_hash: str) -> bool:
-    """
-    Check if document content exists by file hash.
-
-    Args:
-        file_hash (str): File hash to check
-
-    Returns:
-        bool: True if document content exists, False otherwise
-    """
-    try:
-      count = await self.collection.count_documents({"file_hash": file_hash})
-      return count > 0
-    except PyMongoError:
-      return False
-  
-  async def update_document_content(self, document_id: str, update_data: dict) -> Optional[DocumentContent]:
-    """
-    Update document content information.
-
-    Args:
-        document_id (str): Document content ID to update
-        update_data (dict): Updated document content data
-
-    Returns:
-        DocumentContent or None: Updated document content if found, None otherwise
-    """
-    try:
-      if not ObjectId.is_valid(document_id):
-        return None
-      
-      # Remove None values and _id from update data
-      clean_update_data = {k: v for k, v in update_data.items() if v is not None and k != "_id"}
-      
-      if not clean_update_data:
-        return await self.find_document_content_by_id(document_id)
-      
-      result = await self.collection.find_one_and_update(
-        {"_id": ObjectId(document_id)},
-        {"$set": clean_update_data},
-        return_document=True
-      )
-      
-      if result:
-        return DocumentContent(**result)
-      return None
-    
-    except PyMongoError:
-      return None
-  
-  async def delete_document_content(self, document_id: str) -> bool:
-    """
-    Delete document content from database.
-
-    Args:
-        document_id (str): Document content ID to delete
-
-    Returns:
-        bool: True if document content was deleted, False otherwise
-    """
-    try:
-      if not ObjectId.is_valid(document_id):
-        return False
-      
-      result = await self.collection.delete_one({"_id": ObjectId(document_id)})
-      return result.deleted_count > 0
-    except PyMongoError:
-      return False
-  
-  async def list_document_contents(self, skip: int = 0, limit: int = 100) -> List[DocumentContent]:
-    """
-    List document contents with pagination.
-
-    Args:
-        skip (int): Number of document contents to skip (default: 0)
-        limit (int): Maximum number of document contents to return (default: 100)
-
-    Returns:
-        List[DocumentContent]: List of document contents
-    """
-    try:
-      cursor = self.collection.find({}).skip(skip).limit(limit).sort("_id", -1)
-      document_docs = await cursor.to_list(length=limit)
-      return [DocumentContent(**document_doc) for document_doc in document_docs]
-    except PyMongoError:
-      return []
-  
-  async def count_document_contents(self) -> int:
-    """
-    Count total number of document contents.
-
-    Returns:
-        int: Total number of document contents in database
-    """
-    try:
-      return await self.collection.count_documents({})
-    except PyMongoError:
-      return 0
--- a/src/file-processor/app/database/repositories/document_repository.py
+++ b/src/file-processor/app/database/repositories/document_repository.py
@@ -6,9 +6,13 @@ in MongoDB with proper error handling and type safety.
 """

 from typing import Optional, List
+
 from bson import ObjectId
+from pymongo.collection import Collection
+from pymongo.database import Database
 from pymongo.errors import DuplicateKeyError, PyMongoError
-from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
+
+from app.database.connection import get_extra_args
 from app.models.document import FileDocument
 from app.utils.document_matching import fuzzy_matching, subsequence_matching

@@ -34,52 +38,49 @@ class FileDocumentRepository:
  with proper error handling and data validation.
  """
  
-  def __init__(self, database: AsyncIOMotorDatabase):
+  def __init__(self, database: Database):
    """Initialize file repository with database connection."""
    self.db = database
-    self.collection: AsyncIOMotorCollection = self.db.files
-    self._ensure_indexes()
+    self.collection: Collection = self.db.documents
  
-  async def initialize(self):
+  def initialize(self):
    """
    Initialize repository by ensuring required indexes exist.

    Should be called after repository instantiation to setup database indexes.
    """
-    await self._ensure_indexes()
+    self._ensure_indexes()
+    return self
  
-  async def _ensure_indexes(self):
+  def _ensure_indexes(self):
    """
    Ensure required database indexes exist.

    Creates unique index on username field to prevent duplicates.
    """
-    try:
-      await self.collection.create_index("filepath", unique=True)
-    except PyMongoError:
-      # Index might already exist, ignore error
-      pass
+    pass
  
-  async def create_document(self, file_data: FileDocument) -> FileDocument:
+  def create_document(self, file_data: FileDocument, session=None) -> FileDocument:
    """
    Create a new file document in database.
    
    Args:
        file_data (FileDocument): File document data to create
+        session (AsyncIOMotorClientSession, optional): MongoDB session
        
    Returns:
-        FileDocument: Created file document with database ID
+        FileDocument: Created document with database ID
        
    Raises:
        ValueError: If file creation fails due to validation
-        DuplicateKeyError: If file with same hash already exists
+        DuplicateKeyError: If a document with same hash already exists
    """
    try:
      file_dict = file_data.model_dump(by_alias=True, exclude_unset=True)
      if "_id" in file_dict and file_dict["_id"] is None:
        del file_dict["_id"]
      
-      result = await self.collection.insert_one(file_dict)
+      result = self.collection.insert_one(file_dict, **get_extra_args(session))
      file_data.id = result.inserted_id
      return file_data
    
@@ -88,7 +89,7 @@ class FileDocumentRepository:
    except PyMongoError as e:
      raise ValueError(f"Failed to create file document: {e}")
  
-  async def find_document_by_id(self, file_id: str) -> Optional[FileDocument]:
+  def find_document_by_id(self, file_id: str) -> Optional[FileDocument]:
    """
    Find file document by ID.
    
@@ -102,7 +103,7 @@ class FileDocumentRepository:
      if not ObjectId.is_valid(file_id):
        return None
      
-      file_doc = await self.collection.find_one({"_id": ObjectId(file_id)})
+      file_doc = self.collection.find_one({"_id": ObjectId(file_id)})
      if file_doc:
        return FileDocument(**file_doc)
      return None
@@ -110,7 +111,7 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  
-  async def find_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
+  def find_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
    """
    Find file document by file hash to detect duplicates.
    
@@ -121,7 +122,7 @@ class FileDocumentRepository:
        FileDocument or None: File document if found, None otherwise
    """
    try:
-      file_doc = await self.collection.find_one({"file_hash": file_hash})
+      file_doc = self.collection.find_one({"file_hash": file_hash})
      if file_doc:
        return FileDocument(**file_doc)
      return None
@@ -129,7 +130,7 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  
-  async def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
+  def find_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Find file document by exact filepath.
    
@@ -140,7 +141,7 @@ class FileDocumentRepository:
        FileDocument or None: File document if found, None otherwise
    """
    try:
-      file_doc = await self.collection.find_one({"filepath": filepath})
+      file_doc = self.collection.find_one({"filepath": filepath})
      if file_doc:
        return FileDocument(**file_doc)
      return None
@@ -148,7 +149,7 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  
-  async def find_document_by_name(self, filename: str, matching_method: MatchMethodBase = None) -> List[FileDocument]:
+  def find_document_by_name(self, filename: str, matching_method: MatchMethodBase = None) -> List[FileDocument]:
    """
    Find file documents by filename using fuzzy matching.
    
@@ -162,8 +163,7 @@ class FileDocumentRepository:
    try:
      # Get all files from database
      cursor = self.collection.find({})
-      all_files = await cursor.to_list(length=None)
-      all_documents = [FileDocument(**file_doc) for file_doc in all_files]
+      all_documents = [FileDocument(**file_doc) for file_doc in cursor]
      
      if isinstance(matching_method, FuzzyMatching):
        return fuzzy_matching(filename, all_documents, matching_method.threshold)
@@ -173,7 +173,7 @@ class FileDocumentRepository:
    except PyMongoError:
      return []
  
-  async def list_documents(self, skip: int = 0, limit: int = 100) -> List[FileDocument]:
+  def list_documents(self, skip: int = 0, limit: int = 100) -> List[FileDocument]:
    """
    List file documents with pagination.
    
@@ -186,13 +186,12 @@ class FileDocumentRepository:
    """
    try:
      cursor = self.collection.find({}).skip(skip).limit(limit).sort("detected_at", -1)
-      file_docs = await cursor.to_list(length=limit)
-      return [FileDocument(**doc) for doc in file_docs]
+      return [FileDocument(**doc) for doc in cursor]
    
    except PyMongoError:
      return []
  
-  async def count_documents(self) -> int:
+  def count_documents(self) -> int:
    """
    Count total number of file documents.
    
@@ -200,17 +199,18 @@ class FileDocumentRepository:
        int: Total number of file documents in collection
    """
    try:
-      return await self.collection.count_documents({})
+      return self.collection.count_documents({})
    except PyMongoError:
      return 0
  
-  async def update_document(self, file_id: str, update_data: dict) -> Optional[FileDocument]:
+  def update_document(self, file_id: str, update_data: dict, session=None) -> Optional[FileDocument]:
    """
    Update file document with new data.
    
    Args:
        file_id (str): File document ID to update
        update_data (dict): Fields to update
+        session (AsyncIOMotorClientSession, optional): MongoDB session
        
    Returns:
        FileDocument or None: Updated file document if successful, None otherwise
@@ -223,12 +223,13 @@ class FileDocumentRepository:
      clean_update_data = {k: v for k, v in update_data.items() if v is not None}
      
      if not clean_update_data:
-        return await self.find_document_by_id(file_id)
+        return self.find_document_by_id(file_id)
      
-      result = await self.collection.find_one_and_update(
+      result = self.collection.find_one_and_update(
        {"_id": ObjectId(file_id)},
        {"$set": clean_update_data},
-        return_document=True
+        return_document=True,
+        **get_extra_args(session)
      )
      
      if result:
@@ -238,12 +239,13 @@ class FileDocumentRepository:
    except PyMongoError:
      return None
  
-  async def delete_document(self, file_id: str) -> bool:
+  def delete_document(self, file_id: str, session=None) -> bool:
    """
    Delete file document from database.
    
    Args:
        file_id (str): File document ID to delete
+        session (AsyncIOMotorClientSession, optional): MongoDB session
        
    Returns:
        bool: True if file was deleted, False otherwise
@@ -252,7 +254,7 @@ class FileDocumentRepository:
      if not ObjectId.is_valid(file_id):
        return False
      
-      result = await self.collection.delete_one({"_id": ObjectId(file_id)})
+      result = self.collection.delete_one({"_id": ObjectId(file_id)}, **get_extra_args(session))
      return result.deleted_count > 0
    
    except PyMongoError:
--- a/src/file-processor/app/database/repositories/job_repository.py
+++ b/src/file-processor/app/database/repositories/job_repository.py
@@ -0,0 +1,230 @@
+"""
+Repository for managing processing jobs in MongoDB.
+
+This module provides data access layer for ProcessingJob operations
+with automatic timestamp management and error handling.
+"""
+
+from datetime import datetime
+from typing import List, Optional
+
+from pymongo.collection import Collection
+from pymongo.database import Database
+from pymongo.errors import PyMongoError
+
+from app.exceptions.job_exceptions import JobRepositoryError
+from app.models.job import ProcessingJob, ProcessingStatus
+from app.models.types import PyObjectId
+
+
+class JobRepository:
+  """
+  Repository for processing job data access operations.
+
+  Provides CRUD operations for ProcessingJob documents with automatic
+  timestamp management and proper error handling.
+  """
+  
+  def __init__(self, database: Database):
+    """Initialize repository with MongoDB collection reference."""
+    self.db = database
+    self.collection: Collection = self.db.processing_jobs
+  
+  def _ensure_indexes(self):
+    """
+    Ensure required database indexes exist.
+
+    Creates unique index on username field to prevent duplicates.
+    """
+    try:
+      self.collection.create_index("document_id", unique=True)
+    except PyMongoError:
+      # Index might already exist, ignore error
+      pass
+  
+  def initialize(self):
+    """
+    Initialize repository by ensuring required indexes exist.
+
+    Should be called after repository instantiation to setup database indexes.
+    """
+    self._ensure_indexes()
+    return self
+  
+  def create_job(self, document_id: PyObjectId, task_id: Optional[str] = None) -> ProcessingJob:
+    """
+    Create a new processing job.
+
+    Args:
+        file_id: Reference to the file document
+        task_id: Optional Celery task UUID
+
+    Returns:
+        The created ProcessingJob
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      job_data = {
+          "document_id": document_id,
+          "status": ProcessingStatus.PENDING,
+          "task_id": task_id,
+          "created_at": datetime.now(),
+          "started_at": None,
+          "completed_at": None,
+          "error_message": None
+      }
+      
+      result = self.collection.insert_one(job_data)
+      job_data["_id"] = result.inserted_id
+      
+      return ProcessingJob(**job_data)
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("create_job", e)
+  
+  def find_job_by_id(self, job_id: PyObjectId) -> Optional[ProcessingJob]:
+    """
+    Retrieve a job by its ID.
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        The ProcessingJob document
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      job_data = self.collection.find_one({"_id": job_id})
+      if job_data:
+        return ProcessingJob(**job_data)
+      
+      return None
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("get_job_by_id", e)
+  
+  def update_job_status(
+      self,
+      job_id: PyObjectId,
+      status: ProcessingStatus,
+      error_message: Optional[str] = None
+  ) -> Optional[ProcessingJob]:
+    """
+    Update job status with automatic timestamp management.
+
+    Args:
+        job_id: The job ObjectId
+        status: New processing status
+        error_message: Optional error message for failed jobs
+
+    Returns:
+        The updated ProcessingJob
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      # Prepare update data
+      update_data = {"status": status}
+      
+      # Set appropriate timestamp based on status
+      current_time = datetime.now()
+      if status == ProcessingStatus.PROCESSING:
+        update_data["started_at"] = current_time
+      elif status in (ProcessingStatus.COMPLETED, ProcessingStatus.FAILED):
+        update_data["completed_at"] = current_time
+      
+      # Add error message if provided
+      if error_message is not None:
+        update_data["error_message"] = error_message
+      
+      result = self.collection.find_one_and_update(
+        {"_id": job_id},
+        {"$set": update_data},
+        return_document=True
+      )
+      
+      if result:
+        return ProcessingJob(**result)
+      
+      return None
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("update_job_status", e)
+  
+  def delete_job(self, job_id: PyObjectId) -> bool:
+    """
+    Delete a job from the database.
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        True if job was deleted, False if not found
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      result = self.collection.delete_one({"_id": job_id})
+      
+      return result.deleted_count > 0
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("delete_job", e)
+  
+  def find_jobs_by_document_id(self, document_id: PyObjectId) -> List[ProcessingJob]:
+    """
+    Retrieve all jobs for a specific file.
+
+    Args:
+        document_id: The file ObjectId
+
+    Returns:
+        List of ProcessingJob documents
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      cursor = self.collection.find({"document_id": document_id})
+      
+      jobs = []
+      for job_data in cursor:
+        jobs.append(ProcessingJob(**job_data))
+      
+      return jobs
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("get_jobs_by_file_id", e)
+  
+  def get_jobs_by_status(self, status: ProcessingStatus) -> List[ProcessingJob]:
+    """
+    Retrieve all jobs with a specific status.
+
+    Args:
+        status: The processing status to filter by
+
+    Returns:
+        List of ProcessingJob documents
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    try:
+      cursor = self.collection.find({"status": status})
+      
+      jobs = []
+      for job_data in cursor:
+        jobs.append(ProcessingJob(**job_data))
+      
+      return jobs
+    
+    except PyMongoError as e:
+      raise JobRepositoryError("get_jobs_by_status", e)
--- a/src/file-processor/app/database/repositories/user_repository.py
+++ b/src/file-processor/app/database/repositories/user_repository.py
@@ -5,10 +5,12 @@ This module implements the repository pattern for user CRUD operations
 with dependency injection of the database connection using async/await.
 """

-from typing import Optional, List
 from datetime import datetime
+from typing import Optional, List
+
 from bson import ObjectId
-from motor.motor_asyncio import AsyncIOMotorDatabase, AsyncIOMotorCollection
+from pymongo.collection import Collection
+from pymongo.database import Database
 from pymongo.errors import DuplicateKeyError, PyMongoError

 from app.models.user import UserCreate, UserInDB, UserUpdate
@@ -23,7 +25,7 @@ class UserRepository:
  following the repository pattern with dependency injection and async/await.
  """
  
-  def __init__(self, database: AsyncIOMotorDatabase):
+  def __init__(self, database: Database):
    """
    Initialize repository with database dependency.

@@ -31,30 +33,30 @@ class UserRepository:
        database (AsyncIOMotorDatabase): MongoDB database instance
    """
    self.db = database
-    self.collection: AsyncIOMotorCollection = database.users
-    self._ensure_indexes()
+    self.collection: Collection = database.users
  
-  async def initialize(self):
+  def initialize(self):
    """
    Initialize repository by ensuring required indexes exist.

    Should be called after repository instantiation to setup database indexes.
    """
-    await self._ensure_indexes()
+    self._ensure_indexes()
+    return self
  
-  async def _ensure_indexes(self):
+  def _ensure_indexes(self):
    """
    Ensure required database indexes exist.

    Creates unique index on username field to prevent duplicates.
    """
    try:
-      await self.collection.create_index("username", unique=True)
+      self.collection.create_index("username", unique=True)
    except PyMongoError:
      # Index might already exist, ignore error
      pass
  
-  async def create_user(self, user_data: UserCreate) -> UserInDB:
+  def create_user(self, user_data: UserCreate) -> UserInDB:
    """
    Create a new user in the database.

@@ -79,7 +81,7 @@ class UserRepository:
    }
    
    try:
-      result = await self.collection.insert_one(user_dict)
+      result = self.collection.insert_one(user_dict)
      user_dict["_id"] = result.inserted_id
      return UserInDB(**user_dict)
    except DuplicateKeyError as e:
@@ -87,7 +89,7 @@ class UserRepository:
    except PyMongoError as e:
      raise ValueError(f"Failed to create user: {e}")
  
-  async def find_user_by_username(self, username: str) -> Optional[UserInDB]:
+  def find_user_by_username(self, username: str) -> Optional[UserInDB]:
    """
    Find user by username.

@@ -98,14 +100,14 @@ class UserRepository:
        UserInDB or None: User if found, None otherwise
    """
    try:
-      user_doc = await self.collection.find_one({"username": username})
+      user_doc = self.collection.find_one({"username": username})
      if user_doc:
        return UserInDB(**user_doc)
      return None
    except PyMongoError:
      return None
  
-  async def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
+  def find_user_by_id(self, user_id: str) -> Optional[UserInDB]:
    """
    Find user by ID.

@@ -119,14 +121,14 @@ class UserRepository:
      if not ObjectId.is_valid(user_id):
        return None
      
-      user_doc = await self.collection.find_one({"_id": ObjectId(user_id)})
+      user_doc = self.collection.find_one({"_id": ObjectId(user_id)})
      if user_doc:
        return UserInDB(**user_doc)
      return None
    except PyMongoError:
      return None
  
-  async def find_user_by_email(self, email: str) -> Optional[UserInDB]:
+  def find_user_by_email(self, email: str) -> Optional[UserInDB]:
    """
    Find user by email address.

@@ -137,14 +139,14 @@ class UserRepository:
        UserInDB or None: User if found, None otherwise
    """
    try:
-      user_doc = await self.collection.find_one({"email": email})
+      user_doc = self.collection.find_one({"email": email})
      if user_doc:
        return UserInDB(**user_doc)
      return None
    except PyMongoError:
      return None
  
-  async def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
+  def update_user(self, user_id: str, user_update: UserUpdate) -> Optional[UserInDB]:
    """
    Update user information.

@@ -177,9 +179,9 @@ class UserRepository:
      clean_update_data = {k: v for k, v in update_data.items() if v is not None}
      
      if not clean_update_data:
-        return await self.find_user_by_id(user_id)
+        return self.find_user_by_id(user_id)
      
-      result = await self.collection.find_one_and_update(
+      result = self.collection.find_one_and_update(
        {"_id": ObjectId(user_id)},
        {"$set": clean_update_data},
        return_document=True
@@ -192,7 +194,7 @@ class UserRepository:
    except PyMongoError:
      return None
  
-  async def delete_user(self, user_id: str) -> bool:
+  def delete_user(self, user_id: str) -> bool:
    """
    Delete user from database.

@@ -206,12 +208,12 @@ class UserRepository:
      if not ObjectId.is_valid(user_id):
        return False
      
-      result = await self.collection.delete_one({"_id": ObjectId(user_id)})
+      result = self.collection.delete_one({"_id": ObjectId(user_id)})
      return result.deleted_count > 0
    except PyMongoError:
      return False
  
-  async def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
+  def list_users(self, skip: int = 0, limit: int = 100) -> List[UserInDB]:
    """
    List users with pagination.

@@ -224,12 +226,12 @@ class UserRepository:
    """
    try:
      cursor = self.collection.find({}).skip(skip).limit(limit).sort("created_at", -1)
-      user_docs = await cursor.to_list(length=limit)
+      user_docs = cursor.to_list(length=limit)
      return [UserInDB(**user_doc) for user_doc in user_docs]
    except PyMongoError:
      return []
  
-  async def count_users(self) -> int:
+  def count_users(self) -> int:
    """
    Count total number of users.

@@ -237,11 +239,11 @@ class UserRepository:
        int: Total number of users in database
    """
    try:
-      return await self.collection.count_documents({})
+      return self.collection.count_documents({})
    except PyMongoError:
      return 0
  
-  async def user_exists(self, username: str) -> bool:
+  def user_exists(self, username: str) -> bool:
    """
    Check if user exists by username.

@@ -252,7 +254,7 @@ class UserRepository:
        bool: True if user exists, False otherwise
    """
    try:
-      count = await self.collection.count_documents({"username": username})
+      count = self.collection.count_documents({"username": username})
      return count > 0
    except PyMongoError:
      return False
--- a/src/file-processor/app/exceptions/init.py
+++ b/src/file-processor/app/exceptions/init.py
--- a/src/file-processor/app/exceptions/job_exceptions.py
+++ b/src/file-processor/app/exceptions/job_exceptions.py
@@ -0,0 +1,38 @@
+"""
+Custom exceptions for job management operations.
+
+This module defines specific exceptions for job processing lifecycle
+and repository operations to provide clear error handling.
+"""
+
+from app.models.job import ProcessingStatus
+
+
+class InvalidStatusTransitionError(Exception):
+  """
+  Raised when an invalid status transition is attempted.
+
+  This exception indicates that an attempt was made to change a job's
+  status to an invalid target status given the current status.
+  """
+  
+  def __init__(self, current_status: ProcessingStatus, target_status: ProcessingStatus):
+    self.current_status = current_status
+    self.target_status = target_status
+    super().__init__(
+      f"Invalid status transition from '{current_status}' to '{target_status}'"
+    )
+
+
+class JobRepositoryError(Exception):
+  """
+  Raised when a MongoDB operation fails in the job repository.
+
+  This exception wraps database-related errors that occur during
+  job repository operations.
+  """
+  
+  def __init__(self, operation: str, original_error: Exception):
+    self.operation = operation
+    self.original_error = original_error
+    super().__init__(f"Repository operation '{operation}' failed: {str(original_error)}")
--- a/src/file-processor/app/file_watcher.py
+++ b/src/file-processor/app/file_watcher.py
@@ -0,0 +1,243 @@
+"""
+File watcher implementation with Watchdog observer and ProcessingJob management.
+
+This module provides real-time file monitoring for document processing.
+When a file is created in the watched directory, it:
+1. Creates a document record via DocumentService
+2. Dispatches a Celery task for processing
+3. Creates a ProcessingJob to track the task lifecycle
+"""
+
+import logging
+import threading
+from pathlib import Path
+from typing import Optional
+
+from watchdog.events import FileSystemEventHandler, FileCreatedEvent
+from watchdog.observers import Observer
+
+from app.services.document_service import DocumentService
+from app.services.job_service import JobService
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentFileEventHandler(FileSystemEventHandler):
+  """
+  Event handler for document file creation events.
+  
+  Processes newly created files by creating document records,
+  dispatching Celery tasks, and managing processing jobs.
+  """
+  
+  SUPPORTED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
+  
+  def __init__(self, document_service: DocumentService, job_service: JobService):
+    """
+    Initialize the event handler.
+    
+    Args:
+        document_service: Service for document management
+        job_service: Service for processing job management
+    """
+    super().__init__()
+    self.document_service = document_service
+    self.job_service = job_service
+  
+  def on_created(self, event: FileCreatedEvent) -> None:
+    """
+    Handle file creation events.
+    
+    Args:
+        event: File system event containing file path information
+    """
+    if event.is_directory:
+      return
+    
+    filepath = event.src_path
+    file_extension = Path(filepath).suffix.lower()
+    
+    if file_extension not in self.SUPPORTED_EXTENSIONS:
+      logger.info(f"Ignoring unsupported file type: {filepath}")
+      return
+    
+    logger.info(f"Processing new file: {filepath}")
+    
+    #    try:
+    from tasks.document_processing import process_document
+    task_result = process_document.delay(filepath)
+    print(task_result)
+    print("hello world")
+    # task_id = task_result.task_id
+    # logger.info(f"Dispatched Celery task with ID: {task_id}")
+    
+    # except Exception as e:
+    #   logger.error(f"Failed to process file {filepath}: {str(e)}")
+    #   # Note: We don't re-raise the exception to keep the watcher running
+
+
+class FileWatcher:
+  """
+  File system watcher for automatic document processing.
+  
+  Monitors a directory for new files and triggers processing pipeline
+  using a dedicated observer thread.
+  """
+  
+  def __init__(
+      self,
+      watch_directory: str,
+      document_service: DocumentService,
+      job_service: JobService,
+      recursive: bool = True
+  ):
+    """
+    Initialize the file watcher.
+    
+    Args:
+        watch_directory: Directory path to monitor
+        document_service: Service for document management
+        job_service: Service for processing job management
+        recursive: Whether to watch subdirectories recursively
+    """
+    self.watch_directory = Path(watch_directory)
+    self.recursive = recursive
+    self.observer: Optional[Observer] = None
+    self._observer_thread: Optional[threading.Thread] = None
+    self._stop_event = threading.Event()
+    
+    # Validate watch directory
+    if not self.watch_directory.exists():
+      raise ValueError(f"Watch directory does not exist: {watch_directory}")
+    
+    if not self.watch_directory.is_dir():
+      raise ValueError(f"Watch path is not a directory: {watch_directory}")
+    
+    # Create event handler
+    self.event_handler = DocumentFileEventHandler(
+      document_service=document_service,
+      job_service=job_service
+    )
+    
+    logger.info(f"FileWatcher initialized for directory: {self.watch_directory}")
+  
+  def start(self) -> None:
+    """
+    Start the file watcher in a separate thread.
+    
+    Raises:
+        RuntimeError: If the watcher is already running
+    """
+    if self.is_running():
+      raise RuntimeError("FileWatcher is already running")
+    
+    self.observer = Observer()
+    self.observer.schedule(
+      self.event_handler,
+      str(self.watch_directory),
+      recursive=self.recursive
+    )
+    
+    # Start observer in separate thread
+    self._observer_thread = threading.Thread(
+      target=self._run_observer,
+      name="FileWatcher-Observer"
+    )
+    self._stop_event.clear()
+    self._observer_thread.start()
+    
+    logger.info("FileWatcher started successfully")
+  
+  def stop(self, timeout: float = 5.0) -> None:
+    """
+    Stop the file watcher gracefully.
+    
+    Args:
+        timeout: Maximum time to wait for graceful shutdown
+    """
+    if not self.is_running():
+      logger.warning("FileWatcher is not running")
+      return
+    
+    logger.info("Stopping FileWatcher...")
+    
+    # Signal stop and wait for observer thread
+    self._stop_event.set()
+    
+    if self.observer:
+      self.observer.stop()
+    
+    if self._observer_thread and self._observer_thread.is_alive():
+      self._observer_thread.join(timeout=timeout)
+      
+      if self._observer_thread.is_alive():
+        logger.warning("FileWatcher thread did not stop gracefully within timeout")
+      else:
+        logger.info("FileWatcher stopped gracefully")
+    
+    # Clean up
+    self.observer = None
+    self._observer_thread = None
+  
+  def is_running(self) -> bool:
+    """
+    Check if the file watcher is currently running.
+    
+    Returns:
+        True if the watcher is running, False otherwise
+    """
+    return (
+        self.observer is not None
+        and self._observer_thread is not None
+        and self._observer_thread.is_alive()
+    )
+  
+  def _run_observer(self) -> None:
+    """
+    Internal method to run the observer in a separate thread.
+    
+    This method should not be called directly.
+    """
+    if not self.observer:
+      logger.error("Observer not initialized")
+      return
+    
+    try:
+      self.observer.start()
+      logger.info("Observer thread started")
+      
+      # Keep the observer running until stop is requested
+      while not self._stop_event.is_set():
+        self._stop_event.wait(timeout=1.0)
+      
+      logger.info("Observer thread stopping...")
+    
+    except Exception as e:
+      logger.error(f"Observer thread error: {str(e)}")
+    finally:
+      if self.observer:
+        self.observer.join()
+        logger.info("Observer thread stopped")
+
+
+def create_file_watcher(
+    watch_directory: str,
+    document_service: DocumentService,
+    job_service: JobService
+) -> FileWatcher:
+  """
+  Factory function to create a FileWatcher instance.
+  
+  Args:
+      watch_directory: Directory path to monitor
+      document_service: Service for document management
+      job_service: Service for processing job management
+      
+  Returns:
+      Configured FileWatcher instance
+  """
+  return FileWatcher(
+    watch_directory=watch_directory,
+    document_service=document_service,
+    job_service=job_service
+  )
--- a/src/file-processor/app/main.py
+++ b/src/file-processor/app/main.py
@@ -1,203 +1,169 @@
 """
-FastAPI application for MyDocManager file processor service.
+FastAPI application with integrated FileWatcher for document processing.

-This service provides API endpoints for health checks and task dispatching.
+This module provides the main FastAPI application with:
+- JWT authentication
+- User management APIs
+- Real-time file monitoring via FileWatcher
+- Document processing via Celery tasks
 """

 import logging
-import os
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException, Depends
-from pydantic import BaseModel
-import redis
-from celery import Celery
+from typing import AsyncGenerator

-from app.database.connection import test_database_connection, get_database
-from app.database.repositories.user_repository import UserRepository
-from app.models.user import UserCreate
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.api.routes.auth import router as auth_router
+from app.api.routes.users import router as users_router
+from app.config import settings
+from app.database.connection import get_database
+from app.file_watcher import create_file_watcher, FileWatcher
+from app.services.document_service import DocumentService
 from app.services.init_service import InitializationService
+from app.services.job_service import JobService
 from app.services.user_service import UserService

 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

+# Global file watcher instance
+file_watcher: FileWatcher = None
+

@asynccontextmanager
-async def lifespan(app: FastAPI):
+async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
  """
-  Application lifespan manager for startup and shutdown tasks.
-
-  Handles initialization tasks that need to run when the application starts,
-  including admin user creation and other setup procedures.
+  FastAPI lifespan context manager.
+  
+  Handles application startup and shutdown events including:
+  - Database connection
+  - Default admin user creation
+  - FileWatcher startup/shutdown
  """
-  # Startup tasks
+  global file_watcher
+  
+  # Startup
  logger.info("Starting MyDocManager application...")
  
  try:
    # Initialize database connection
    database = get_database()
+    logger.info("Database connection established")
    
-    # Initialize repositories and services
-    user_repository = UserRepository(database)
-    user_service = UserService(user_repository)
+    document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
+    job_service = JobService(database=database)
+    user_service = UserService(database=database)
+    logger.info("Service created")
+    
+    # Create default admin user
    init_service = InitializationService(user_service)
+    init_service.initialize_application()
+    logger.info("Default admin user initialization completed")
    
-    # Run initialization tasks
-    initialization_result = init_service.initialize_application()
+    # Create and start file watcher
+    file_watcher = create_file_watcher(
+      watch_directory=settings.watch_directory(),
+      document_service=document_service,
+      job_service=job_service
+    )
+    file_watcher.start()
+    logger.info(f"FileWatcher started for directory: {settings.watch_directory()}")
    
-    if initialization_result["initialization_success"]:
-      logger.info("Application startup completed successfully")
-      if initialization_result["admin_user_created"]:
-        logger.info("Default admin user was created during startup")
-    else:
-      logger.error("Application startup completed with errors:")
-      for error in initialization_result["errors"]:
-        logger.error(f"  - {error}")
+    logger.info("Application startup completed successfully")
+    
+    yield
  
  except Exception as e:
-    logger.error(f"Critical error during application startup: {str(e)}")
-    # You might want to decide if the app should continue or exit here
-    # For now, we log the error but continue
+    logger.error(f"Application startup failed: {str(e)}")
+    raise
  
-  yield  # Application is running
-  
-  # Shutdown tasks (if needed)
-  logger.info("Shutting down MyDocManager application...")
+  finally:
+    # Shutdown
+    logger.info("Shutting down MyDocManager application...")
+    
+    if file_watcher and file_watcher.is_running():
+      file_watcher.stop()
+      logger.info("FileWatcher stopped")
+    
+    logger.info("Application shutdown completed")


-# Initialize FastAPI app
+# Create FastAPI application
 app = FastAPI(
-  title="MyDocManager File Processor",
-  description="File processing and task dispatch service",
-  version="1.0.0",
+  title="MyDocManager",
+  description="Real-time document processing application with authentication",
+  version="0.1.0",
  lifespan=lifespan
 )

-# Environment variables
-REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
-
-# Initialize Redis client
-try:
-  redis_client = redis.from_url(REDIS_URL)
-except Exception as e:
-  redis_client = None
-  print(f"Warning: Could not connect to Redis: {e}")
-
-# Initialize Celery
-celery_app = Celery(
-  "file_processor",
-  broker=REDIS_URL,
-  backend=REDIS_URL
+# Configure CORS
+app.add_middleware(
+  CORSMiddleware,
+  allow_origins=["http://localhost:5173"],  # React frontend
+  allow_credentials=True,
+  allow_methods=["*"],
+  allow_headers=["*"],
 )

+# Include routers
+app.include_router(auth_router, prefix="/auth", tags=["Authentication"])
+app.include_router(users_router, prefix="/users", tags=["User Management"])
+# app.include_router(documents_router, prefix="/documents", tags=["Documents"])
+# app.include_router(jobs_router, prefix="/jobs", tags=["Processing Jobs"])

-# Pydantic models
-class TestTaskRequest(BaseModel):
-  """Request model for test task."""
-  message: str
-
-
-def get_user_service() -> UserService:
-  """
-  Dependency to get user service instance.
-
-  This should be properly implemented with database connection management
-  in your actual application.
-  """
-  database = get_database()
-  user_repository = UserRepository(database)
-  return UserService(user_repository)
-
-
-# Your API routes would use the service like this:
-@app.post("/api/users")
-async def create_user(
-    user_data: UserCreate,
-    user_service: UserService = Depends(get_user_service)
-):
-  return user_service.create_user(user_data)

@app.get("/health")
 async def health_check():
  """
  Health check endpoint.
-
+  
  Returns:
-      dict: Service health status with dependencies
+      Dictionary containing application health status
  """
-  health_status = {
+  return {
      "status": "healthy",
-      "service": "file-processor",
-      "dependencies": {
-          "redis": "unknown",
-          "mongodb": "unknown"
-      },
+      "service": "MyDocManager",
+      "version": "1.0.0",
+      "file_watcher_running": file_watcher.is_running() if file_watcher else False
  }
-  
-  # Check Redis connection
-  if redis_client:
-    try:
-      redis_client.ping()
-      health_status["dependencies"]["redis"] = "connected"
-    except Exception:
-      health_status["dependencies"]["redis"] = "disconnected"
-      health_status["status"] = "degraded"
-  
-  # check MongoDB connection
-  if test_database_connection():
-    health_status["dependencies"]["mongodb"] = "connected"
-  else:
-    health_status["dependencies"]["mongodb"] = "disconnected"
-  
-  return health_status
-
-
-@app.post("/test-task")
-async def dispatch_test_task(request: TestTaskRequest):
-  """
-  Dispatch a test task to Celery worker.
-
-  Args:
-      request: Test task request containing message
-
-  Returns:
-      dict: Task dispatch information
-
-  Raises:
-      HTTPException: If task dispatch fails
-  """
-  try:
-    # Send task to worker
-    task = celery_app.send_task(
-      "main.test_task",
-      args=[request.message]
-    )
-    
-    return {
-        "status": "dispatched",
-        "task_id": task.id,
-        "message": f"Test task dispatched with message: {request.message}"
-    }
-  
-  except Exception as e:
-    raise HTTPException(
-      status_code=500,
-      detail=f"Failed to dispatch task: {str(e)}"
-    )


@app.get("/")
 async def root():
  """
-  Root endpoint.
-
+  Root endpoint with basic application information.
+  
  Returns:
-      dict: Basic service information
+      Dictionary containing welcome message and available endpoints
  """
  return {
-      "service": "MyDocManager File Processor",
-      "version": "1.0.0",
-      "status": "running"
+      "message": "Welcome to MyDocManager",
+      "description": "Real-time document processing application",
+      "docs": "/docs",
+      "health": "/health"
+  }
+
+
+@app.get("/watcher/status")
+async def watcher_status():
+  """
+  Get file watcher status.
+  
+  Returns:
+      Dictionary containing file watcher status information
+  """
+  if not file_watcher:
+    return {
+        "status": "not_initialized",
+        "running": False
+    }
+  
+  return {
+      "status": "initialized",
+      "running": file_watcher.is_running(),
+      "watch_directory": str(file_watcher.watch_directory),
+      "recursive": file_watcher.recursive
  }
--- a/src/file-processor/app/models/auth.py
+++ b/src/file-processor/app/models/auth.py
@@ -3,12 +3,45 @@ Authentication models and enums for user management.

 Contains user roles enumeration and authentication-related Pydantic models.
 """
-
+from datetime import datetime
 from enum import Enum

+from pydantic import BaseModel, Field
+
+from app.models.types import PyObjectId
+

 class UserRole(str, Enum):
  """User roles enumeration with string values."""
  
  USER = "user"
-  ADMIN = "admin"
+  ADMIN = "admin"
+
+
+class UserResponse(BaseModel):
+  """Model for user data in API responses (excludes password_hash)."""
+  
+  id: PyObjectId = Field(alias="_id")
+  username: str
+  email: str
+  role: UserRole
+  is_active: bool
+  created_at: datetime
+  updated_at: datetime
+  
+  model_config = {
+      "populate_by_name": True,
+      "arbitrary_types_allowed": True,
+  }
+
+
+class LoginResponse(BaseModel):
+  """Response model for successful login."""
+  access_token: str
+  token_type: str = "bearer"
+  user: UserResponse
+
+
+class MessageResponse(BaseModel):
+  """Generic message response."""
+  message: str
--- a/src/file-processor/app/models/document.py
+++ b/src/file-processor/app/models/document.py
@@ -33,15 +33,6 @@ class ExtractionMethod(str, Enum):
  HYBRID = "hybrid"


-class ProcessingStatus(str, Enum):
-  """Status values for processing jobs."""
-  
-  PENDING = "pending"
-  PROCESSING = "processing"
-  COMPLETED = "completed"
-  FAILED = "failed"
-
-
 class FileDocument(BaseModel):
  """
  Model for file documents stored in the 'files' collection.
@@ -58,6 +49,9 @@ class FileDocument(BaseModel):
  metadata: Dict[str, Any] = Field(default_factory=dict, description="File-specific metadata")
  detected_at: Optional[datetime] = Field(default=None, description="Timestamp when file was detected")
  file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
+  encoding: str = Field(default="utf-8", description="Character encoding for text files")
+  file_size: int = Field(..., ge=0, description="File size in bytes")
+  mime_type: str = Field(..., description="MIME type detected")
  
  @field_validator('filepath')
  @classmethod
@@ -74,69 +68,3 @@ class FileDocument(BaseModel):
    if not v.strip():
      raise ValueError("Filename cannot be empty")
    return v.strip()
-  
-  class Config:
-    """Pydantic configuration."""
-    populate_by_name = True
-    arbitrary_types_allowed = True
-    json_encoders = {ObjectId: str}
-
-
-class DocumentContent(BaseModel):
-  """Model for document content."""
-  
-  id: Optional[PyObjectId] = Field(default=None, alias="_id")
-  file_hash: Optional[str] = Field(default=None, description="SHA256 hash of file content")
-  content: str = Field(..., description="File content")
-  encoding: str = Field(default="utf-8", description="Character encoding for text files")
-  file_size: int = Field(..., ge=0, description="File size in bytes")
-  mime_type: str = Field(..., description="MIME type detected")
-
-
-class ProcessingJob(BaseModel):
-  """
-  Model for processing jobs stored in the 'processing_jobs' collection.
-
-  Tracks the lifecycle and status of document processing tasks.
-  """
-  
-  id: Optional[PyObjectId] = Field(default=None, alias="_id")
-  file_id: PyObjectId = Field(..., description="Reference to file document")
-  status: ProcessingStatus = Field(
-    default=ProcessingStatus.PENDING,
-    description="Current processing status"
-  )
-  task_id: Optional[str] = Field(
-    default=None,
-    description="Celery task UUID"
-  )
-  created_at: Optional[datetime] = Field(
-    default=None,
-    description="Timestamp when job was created"
-  )
-  started_at: Optional[datetime] = Field(
-    default=None,
-    description="Timestamp when processing started"
-  )
-  completed_at: Optional[datetime] = Field(
-    default=None,
-    description="Timestamp when processing completed"
-  )
-  error_message: Optional[str] = Field(
-    default=None,
-    description="Error message if processing failed"
-  )
-  
-  @field_validator('error_message')
-  @classmethod
-  def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
-    """Clean up error message."""
-    if v is not None:
-      return v.strip() if v.strip() else None
-    return v
-  
-  class Config:
-    """Pydantic configuration."""
-    populate_by_name = True
-    arbitrary_types_allowed = True
-    json_encoders = {ObjectId: str}
--- a/src/file-processor/app/models/job.py
+++ b/src/file-processor/app/models/job.py
@@ -0,0 +1,42 @@
+from datetime import datetime
+from enum import Enum
+from typing import Optional
+
+from bson import ObjectId
+from pydantic import BaseModel, Field, field_validator
+
+from app.models.types import PyObjectId
+
+
+class ProcessingStatus(str, Enum):
+  """Status values for processing jobs."""
+  
+  PENDING = "pending"
+  PROCESSING = "processing"
+  COMPLETED = "completed"
+  FAILED = "failed"
+
+
+class ProcessingJob(BaseModel):
+  """
+  Model for processing jobs stored in the 'processing_jobs' collection.
+
+  Tracks the lifecycle and status of document processing tasks.
+  """
+  
+  id: Optional[PyObjectId] = Field(default=None, alias="_id")
+  document_id: PyObjectId = Field(..., description="Reference to file document")
+  status: ProcessingStatus = Field(default=ProcessingStatus.PENDING, description="Current processing status")
+  task_id: Optional[str] = Field(default=None, description="Celery task UUID")
+  created_at: Optional[datetime] = Field(default=None, description="Timestamp when job was created")
+  started_at: Optional[datetime] = Field(default=None, description="Timestamp when processing started")
+  completed_at: Optional[datetime] = Field(default=None, description="Timestamp when processing completed")
+  error_message: Optional[str] = Field(default=None, description="Error message if processing failed")
+  
+  @field_validator('error_message')
+  @classmethod
+  def validate_error_message(cls, v: Optional[str]) -> Optional[str]:
+    """Clean up error message."""
+    if v is not None:
+      return v.strip() if v.strip() else None
+    return v
--- a/src/file-processor/app/models/user.py
+++ b/src/file-processor/app/models/user.py
@@ -7,10 +7,10 @@ and API responses with proper validation and type safety.

 import re
 from datetime import datetime
-from typing import Optional, Any
+from typing import Optional
+
 from bson import ObjectId
 from pydantic import BaseModel, Field, field_validator, EmailStr
-from pydantic_core import core_schema

 from app.models.auth import UserRole
 from app.models.types import PyObjectId
@@ -138,21 +138,3 @@ class UserInDB(BaseModel):
      "arbitrary_types_allowed": True,
      "json_encoders": {ObjectId: str}
  }
-
-
-class UserResponse(BaseModel):
-  """Model for user data in API responses (excludes password_hash)."""
-  
-  id: PyObjectId = Field(alias="_id")
-  username: str
-  email: str
-  role: UserRole
-  is_active: bool
-  created_at: datetime
-  updated_at: datetime
-  
-  model_config = {
-      "populate_by_name": True,
-      "arbitrary_types_allowed": True,
-      "json_encoders": {ObjectId: str}
-  }
--- a/src/file-processor/app/services/auth_service.py
+++ b/src/file-processor/app/services/auth_service.py
@@ -4,7 +4,11 @@ Authentication service for password hashing and verification.
 This module provides authentication-related functionality including
 password hashing, verification, and JWT token management.
 """
+from datetime import datetime, timedelta

+import jwt
+
+from app.config import settings
 from app.utils.security import hash_password, verify_password


@@ -55,4 +59,26 @@ class AuthService:
        >>> auth.verify_user_password("wrongpassword", hashed)
        False
    """
-    return verify_password(password, hashed_password)
+    return verify_password(password, hashed_password)
+  
+  @staticmethod
+  def create_access_token(data=dict) -> str:
+    """
+      Create a JWT access token.
+
+      Args:
+          data (dict): Payload data to include in the token.
+
+      Returns:
+          str: Encoded JWT token.
+      """
+    # Copy data to avoid modifying the original dict
+    to_encode = data.copy()
+    
+    # Add expiration time
+    expire = datetime.now() + timedelta(hours=settings.get_jwt_expire_hours())
+    to_encode.update({"exp": expire})
+    
+    # Encode JWT
+    encoded_jwt = jwt.encode(to_encode, settings.get_jwt_secret_key(), algorithm=settings.get_jwt_algorithm())
+    return encoded_jwt
--- a/src/file-processor/app/services/document_service.py
+++ b/src/file-processor/app/services/document_service.py
@@ -6,22 +6,19 @@ while maintaining data consistency through MongoDB transactions.
 """

 import hashlib
-import magic
+import os
 from datetime import datetime
 from pathlib import Path
-from typing import List, Optional, Dict, Any, Tuple
+from typing import List, Optional, Dict, Any

-from motor.motor_asyncio import AsyncIOMotorClientSession
+import magic
 from pymongo.errors import PyMongoError

-from app.database.connection import get_database
+from app.config.settings import get_objects_folder
 from app.database.repositories.document_repository import FileDocumentRepository
-from app.database.repositories.document_content_repository import DocumentContentRepository
 from app.models.document import (
  FileDocument,
-  DocumentContent,
  FileType,
-  ProcessingStatus
 )
 from app.models.types import PyObjectId

@@ -34,13 +31,25 @@ class DocumentService:
  and their content while ensuring data consistency through transactions.
  """
  
-  def __init__(self):
-    """Initialize the document service with repository dependencies."""
-    self.db = get_database()
-    self.file_repository = FileDocumentRepository(self.db)
-    self.content_repository = DocumentContentRepository(self.db)
+  def __init__(self, database, objects_folder: str = None):
+    """
+    Initialize the document service with repository dependencies.
+    
+    Args:
+        database: Database instance
+        objects_folder: folder to store files by their hash
+    """
+    
+    self.db = database
+    self.document_repository = FileDocumentRepository(self.db)
+    self.objects_folder = objects_folder or get_objects_folder()
  
-  def _calculate_file_hash(self, file_bytes: bytes) -> str:
+  def initialize(self):
+    self.document_repository.initialize()
+    return self
+  
+  @staticmethod
+  def _calculate_file_hash(file_bytes: bytes) -> str:
    """
    Calculate SHA256 hash of file content.

@@ -52,7 +61,8 @@ class DocumentService:
    """
    return hashlib.sha256(file_bytes).hexdigest()
  
-  def _detect_file_type(self, file_path: str) -> FileType:
+  @staticmethod
+  def _detect_file_type(file_path: str) -> FileType:
    """
    Detect file type from file extension.

@@ -72,7 +82,8 @@ class DocumentService:
    except ValueError:
      raise ValueError(f"Unsupported file type: {extension}")
  
-  def _detect_mime_type(self, file_bytes: bytes) -> str:
+  @staticmethod
+  def _detect_mime_type(file_bytes: bytes) -> str:
    """
    Detect MIME type from file content.

@@ -84,10 +95,51 @@ class DocumentService:
    """
    return magic.from_buffer(file_bytes, mime=True)
  
-  async def create_document(
+  @staticmethod
+  def _read_file_bytes(file_path: str | Path) -> bytes:
+    """
+    Read file content as bytes asynchronously.
+
+    Args:
+        file_path (str | Path): Path of the file to read
+
+    Returns:
+        bytes: Content of the file
+
+    Raises:
+        FileNotFoundError: If the file does not exist
+        OSError: If any I/O error occurs
+    """
+    path = Path(file_path)
+    
+    if not path.exists():
+      raise FileNotFoundError(f"File not found: {file_path}")
+    
+    return path.read_bytes()
+  
+  def _get_document_path(self, file_hash):
+    """
+
+    :param file_hash:
+    :return:
+    """
+    return os.path.join(self.objects_folder, file_hash[:24], file_hash)
+  
+  def save_content_if_needed(self, file_hash, content: bytes):
+    target_path = self._get_document_path(file_hash)
+    if os.path.exists(target_path):
+      return
+    
+    if not os.path.exists(os.path.dirname(target_path)):
+      os.makedirs(os.path.dirname(target_path))
+    
+    with open(target_path, "wb") as f:
+      f.write(content)
+  
+  def create_document(
      self,
      file_path: str,
-      file_bytes: bytes,
+      file_bytes: bytes | None = None,
      encoding: str = "utf-8"
  ) -> FileDocument:
    """
@@ -110,57 +162,40 @@ class DocumentService:
        PyMongoError: If database operation fails
    """
    # Calculate automatic attributes
+    file_bytes = file_bytes if file_bytes is not None else self._read_file_bytes(file_path)
    file_hash = self._calculate_file_hash(file_bytes)
    file_type = self._detect_file_type(file_path)
    mime_type = self._detect_mime_type(file_bytes)
    file_size = len(file_bytes)
    filename = Path(file_path).name
-    detected_at = datetime.utcnow()
+    detected_at = datetime.now()
    
-    # Start MongoDB transaction
-    async with await self.db.client.start_session() as session:
-      async with session.start_transaction():
-        try:
-          # Check if content already exists
-          existing_content = await self.content_repository.find_document_content_by_file_hash(
-            file_hash, session=session
-          )
-          
-          # Create DocumentContent if it doesn't exist
-          if not existing_content:
-            content_data = DocumentContent(
-              file_hash=file_hash,
-              content="",  # Will be populated by processing workers
-              encoding=encoding,
-              file_size=file_size,
-              mime_type=mime_type
-            )
-            await self.content_repository.create_document_content(
-              content_data, session=session
-            )
-          
-          # Create FileDocument
-          file_data = FileDocument(
-            filename=filename,
-            filepath=file_path,
-            file_type=file_type,
-            extraction_method=None,  # Will be set by processing workers
-            metadata={},  # Empty for now
-            detected_at=detected_at,
-            file_hash=file_hash
-          )
-          
-          created_file = await self.file_repository.create_document(
-            file_data, session=session
-          )
-          
-          return created_file
-        
-        except Exception as e:
-          # Transaction will automatically rollback
-          raise PyMongoError(f"Failed to create document: {str(e)}")
+    try:
+      self.save_content_if_needed(file_hash, file_bytes)
+      
+      # Create FileDocument
+      file_data = FileDocument(
+        filename=filename,
+        filepath=file_path,
+        file_type=file_type,
+        extraction_method=None,  # Will be set by processing workers
+        metadata={},  # Empty for now
+        detected_at=detected_at,
+        file_hash=file_hash,
+        encoding=encoding,
+        file_size=file_size,
+        mime_type=mime_type
+      )
+      
+      created_file = self.document_repository.create_document(file_data)
+      
+      return created_file
+    
+    except Exception as e:
+      # Transaction will automatically rollback if supported
+      raise PyMongoError(f"Failed to create document: {str(e)}")
  
-  async def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
+  def get_document_by_id(self, document_id: PyObjectId) -> Optional[FileDocument]:
    """
    Retrieve a document by its ID.

@@ -170,9 +205,9 @@ class DocumentService:
    Returns:
        FileDocument if found, None otherwise
    """
-    return await self.file_repository.find_document_by_id(document_id)
+    return self.document_repository.find_document_by_id(str(document_id))
  
-  async def get_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
+  def get_document_by_hash(self, file_hash: str) -> Optional[FileDocument]:
    """
    Retrieve a document by its file hash.

@@ -182,9 +217,9 @@ class DocumentService:
    Returns:
        FileDocument if found, None otherwise
    """
-    return await self.file_repository.find_document_by_hash(file_hash)
+    return self.document_repository.find_document_by_hash(file_hash)
  
-  async def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
+  def get_document_by_filepath(self, filepath: str) -> Optional[FileDocument]:
    """
    Retrieve a document by its file path.

@@ -194,34 +229,17 @@ class DocumentService:
    Returns:
        FileDocument if found, None otherwise
    """
-    return await self.file_repository.find_document_by_filepath(filepath)
+    return self.document_repository.find_document_by_filepath(filepath)
  
-  async def get_document_with_content(
-      self,
-      document_id: PyObjectId
-  ) -> Optional[Tuple[FileDocument, DocumentContent]]:
-    """
-    Retrieve a document with its associated content.
-
-    Args:
-        document_id: Document ObjectId
-
-    Returns:
-        Tuple of (FileDocument, DocumentContent) if found, None otherwise
-    """
-    document = await self.get_document_by_id(document_id)
-    if not document:
+  def get_document_content_by_hash(self, file_hash):
+    target_path = self._get_document_path(file_hash)
+    if not os.path.exists(target_path):
      return None
    
-    content = await self.content_repository.find_document_content_by_file_hash(
-      document.file_hash
-    )
-    if not content:
-      return None
-    
-    return (document, content)
+    with open(target_path, "rb") as f:
+      return f.read()
  
-  async def list_documents(
+  def list_documents(
      self,
      skip: int = 0,
      limit: int = 100
@@ -236,18 +254,18 @@ class DocumentService:
    Returns:
        List of FileDocument instances
    """
-    return await self.file_repository.list_documents(skip=skip, limit=limit)
+    return self.document_repository.list_documents(skip=skip, limit=limit)
  
-  async def count_documents(self) -> int:
+  def count_documents(self) -> int:
    """
    Get total number of documents.

    Returns:
        Total document count
    """
-    return await self.file_repository.count_documents()
+    return self.document_repository.count_documents()
  
-  async def update_document(
+  def update_document(
      self,
      document_id: PyObjectId,
      update_data: Dict[str, Any]
@@ -262,9 +280,14 @@ class DocumentService:
    Returns:
        Updated FileDocument if found, None otherwise
    """
-    return await self.file_repository.update_document(document_id, update_data)
+    if "file_bytes" in update_data:
+      file_hash = self._calculate_file_hash(update_data["file_bytes"])
+      update_data["file_hash"] = file_hash
+      self.save_content_if_needed(file_hash, update_data["file_bytes"])
+    
+    return self.document_repository.update_document(document_id, update_data)
  
-  async def delete_document(self, document_id: PyObjectId) -> bool:
+  def delete_document(self, document_id: PyObjectId) -> bool:
    """
    Delete a document and its orphaned content.

@@ -281,100 +304,31 @@ class DocumentService:
    Raises:
        PyMongoError: If database operation fails
    """
-    # Start MongoDB transaction
-    async with await self.db.client.start_session() as session:
-      async with session.start_transaction():
+    # Start transaction
+    
+    try:
+      # Get document to find its hash
+      document = self.document_repository.find_document_by_id(document_id)
+      if not document:
+        return False
+      
+      # Delete the document
+      deleted = self.document_repository.delete_document(document_id)
+      if not deleted:
+        return False
+      
+      # Check if content is orphaned
+      remaining_files = self.document_repository.find_document_by_hash(document.file_hash)
+      
+      # If no other files reference this content, delete it
+      if not remaining_files:
        try:
-          # Get document to find its hash
-          document = await self.file_repository.find_document_by_id(
-            document_id, session=session
-          )
-          if not document:
-            return False
-          
-          # Delete the document
-          deleted = await self.file_repository.delete_document(
-            document_id, session=session
-          )
-          if not deleted:
-            return False
-          
-          # Check if content is orphaned
-          remaining_files = await self.file_repository.find_document_by_hash(
-            document.file_hash, session=session
-          )
-          
-          # If no other files reference this content, delete it
-          if not remaining_files:
-            content = await self.content_repository.find_document_content_by_file_hash(
-              document.file_hash, session=session
-            )
-            if content:
-              await self.content_repository.delete_document_content(
-                content.id, session=session
-              )
-          
-          return True
-        
-        except Exception as e:
-          # Transaction will automatically rollback
-          raise PyMongoError(f"Failed to delete document: {str(e)}")
-  
-  async def content_exists(self, file_hash: str) -> bool:
-    """
-    Check if content with given hash exists.
-
-    Args:
-        file_hash: SHA256 hash of file content
-
-    Returns:
-        True if content exists, False otherwise
-    """
-    return await self.content_repository.content_exists(file_hash)
-  
-  async def get_content_by_hash(self, file_hash: str) -> Optional[DocumentContent]:
-    """
-    Retrieve content by file hash.
-
-    Args:
-        file_hash: SHA256 hash of file content
-
-    Returns:
-        DocumentContent if found, None otherwise
-    """
-    return await self.content_repository.find_document_content_by_file_hash(file_hash)
-  
-  async def update_document_content(
-      self,
-      file_hash: str,
-      content: str,
-      encoding: str = "utf-8"
-  ) -> Optional[DocumentContent]:
-    """
-    Update the extracted content for a document.
-
-    This method is typically called by processing workers to store
-    the extracted text content.
-
-    Args:
-        file_hash: SHA256 hash of file content
-        content: Extracted text content
-        encoding: Character encoding
-
-    Returns:
-        Updated DocumentContent if found, None otherwise
-    """
-    existing_content = await self.content_repository.find_document_content_by_file_hash(
-      file_hash
-    )
-    if not existing_content:
-      return None
+          os.remove(self._get_document_path(document.file_hash))
+        except Exception:
+          pass
+      
+      return True
    
-    update_data = {
-        "content": content,
-        "encoding": encoding
-    }
-    
-    return await self.content_repository.update_document_content(
-      existing_content.id, update_data
-    )
+    except Exception as e:
+      # Transaction will automatically rollback if supported
+      raise PyMongoError(f"Failed to delete document: {str(e)}")
--- a/src/file-processor/app/services/init_service.py
+++ b/src/file-processor/app/services/init_service.py
@@ -8,8 +8,8 @@ creating default admin user if none exists.
 import logging
 from typing import Optional

-from app.models.user import UserCreate, UserInDB, UserCreateNoValidation
 from app.models.auth import UserRole
+from app.models.user import UserInDB, UserCreateNoValidation
 from app.services.user_service import UserService

 logger = logging.getLogger(__name__)
@@ -31,7 +31,6 @@ class InitializationService:
        user_service (UserService): Service for user operations
    """
    self.user_service = user_service
-
  
  def ensure_admin_user_exists(self) -> Optional[UserInDB]:
    """
@@ -131,4 +130,23 @@ class InitializationService:
      logger.error(error_msg)
      initialization_summary["errors"].append(error_msg)
    
-    return initialization_summary
+    self.log_initialization_result(initialization_summary)
+    
+    return initialization_summary
+  
+  @staticmethod
+  def log_initialization_result(summary: dict) -> None:
+    """
+    Log the result of the initialization process.
+
+    Args:
+        summary (dict): Summary of initialization tasks performed
+    """
+    if summary["initialization_success"]:
+      logger.info("Application startup completed successfully")
+      if summary["admin_user_created"]:
+        logger.info("Default admin user was created during startup")
+    else:
+      logger.error("Application startup completed with errors:")
+      for error in summary["errors"]:
+        logger.error(f"  - {error}")
--- a/src/file-processor/app/services/job_service.py
+++ b/src/file-processor/app/services/job_service.py
@@ -0,0 +1,182 @@
+"""
+Service layer for job processing business logic.
+
+This module provides high-level operations for managing processing jobs
+with strict status transition validation and business rules enforcement.
+"""
+
+from typing import Optional
+
+from app.database.repositories.job_repository import JobRepository
+from app.exceptions.job_exceptions import InvalidStatusTransitionError
+from app.models.job import ProcessingJob, ProcessingStatus
+from app.models.types import PyObjectId
+
+
+class JobService:
+  """
+  Service for processing job business logic operations.
+
+  Provides high-level job management with strict status transition
+  validation and business rule enforcement.
+  """
+  
+  def __init__(self, database):
+    """
+    Initialize service with job repository.
+
+    Args:
+        repository: Optional JobRepository instance (creates default if None)
+    """
+    self.db = database
+    self.repository = JobRepository(database)
+  
+  def initialize(self):
+    self.repository.initialize()
+    return self
+  
+  def create_job(self, document_id: PyObjectId, task_id: Optional[str] = None) -> ProcessingJob:
+    """
+    Create a new processing job.
+
+    Args:
+        document_id: Reference to the file document
+        task_id: Optional Celery task UUID
+
+    Returns:
+        The created ProcessingJob
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    return self.repository.create_job(document_id, task_id)
+  
+  def get_job_by_id(self, job_id: PyObjectId) -> ProcessingJob:
+    """
+    Retrieve a job by its ID.
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        The ProcessingJob document
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        JobRepositoryError: If database operation fails
+    """
+    return self.repository.find_job_by_id(job_id)
+  
+  def mark_job_as_started(self, job_id: PyObjectId) -> ProcessingJob:
+    """
+    Mark a job as started (PENDING → PROCESSING).
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        The updated ProcessingJob
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        InvalidStatusTransitionError: If job is not in PENDING status
+        JobRepositoryError: If database operation fails
+    """
+    # Get current job to validate transition
+    current_job = self.repository.find_job_by_id(job_id)
+    
+    # Validate status transition
+    if current_job.status != ProcessingStatus.PENDING:
+      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.PROCESSING)
+    
+    # Update status
+    return self.repository.update_job_status(job_id, ProcessingStatus.PROCESSING)
+  
+  def mark_job_as_completed(self, job_id: PyObjectId) -> ProcessingJob:
+    """
+    Mark a job as completed (PROCESSING → COMPLETED).
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        The updated ProcessingJob
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        InvalidStatusTransitionError: If job is not in PROCESSING status
+        JobRepositoryError: If database operation fails
+    """
+    # Get current job to validate transition
+    current_job = self.repository.find_job_by_id(job_id)
+    
+    # Validate status transition
+    if current_job.status != ProcessingStatus.PROCESSING:
+      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.COMPLETED)
+    
+    # Update status
+    return self.repository.update_job_status(job_id, ProcessingStatus.COMPLETED)
+  
+  def mark_job_as_failed(
+      self,
+      job_id: PyObjectId,
+      error_message: Optional[str] = None
+  ) -> ProcessingJob:
+    """
+    Mark a job as failed (PROCESSING → FAILED).
+
+    Args:
+        job_id: The job ObjectId
+        error_message: Optional error description
+
+    Returns:
+        The updated ProcessingJob
+
+    Raises:
+        JobNotFoundError: If job doesn't exist
+        InvalidStatusTransitionError: If job is not in PROCESSING status
+        JobRepositoryError: If database operation fails
+    """
+    # Get current job to validate transition
+    current_job = self.repository.find_job_by_id(job_id)
+    
+    # Validate status transition
+    if current_job.status != ProcessingStatus.PROCESSING:
+      raise InvalidStatusTransitionError(current_job.status, ProcessingStatus.FAILED)
+    
+    # Update status with error message
+    return self.repository.update_job_status(
+      job_id,
+      ProcessingStatus.FAILED,
+      error_message
+    )
+  
+  def delete_job(self, job_id: PyObjectId) -> bool:
+    """
+    Delete a job from the database.
+
+    Args:
+        job_id: The job ObjectId
+
+    Returns:
+        True if job was deleted, False if not found
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    return self.repository.delete_job(job_id)
+  
+  def get_jobs_by_status(self, status: ProcessingStatus) -> list[ProcessingJob]:
+    """
+    Retrieve all jobs with a specific status.
+
+    Args:
+        status: The processing status to filter by
+
+    Returns:
+        List of ProcessingJob documents
+
+    Raises:
+        JobRepositoryError: If database operation fails
+    """
+    return self.repository.get_jobs_by_status(status)
--- a/src/file-processor/app/services/user_service.py
+++ b/src/file-processor/app/services/user_service.py
@@ -6,11 +6,11 @@ retrieval, updates, and authentication operations with proper error handling.
 """

 from typing import Optional, List
+
 from pymongo.errors import DuplicateKeyError

-from app.models.user import UserCreate, UserInDB, UserUpdate, UserResponse, UserCreateNoValidation
-from app.models.auth import UserRole
 from app.database.repositories.user_repository import UserRepository
+from app.models.user import UserCreate, UserInDB, UserUpdate, UserCreateNoValidation
 from app.services.auth_service import AuthService


@@ -22,16 +22,21 @@ class UserService:
  authentication, and data management with proper validation.
  """
  
-  def __init__(self, user_repository: UserRepository):
+  def __init__(self, database):
    """
    Initialize user service with repository dependency.

    Args:
        user_repository (UserRepository): Repository for user data operations
    """
-    self.user_repository = user_repository
+    self.db = database
+    self.user_repository = UserRepository(self.db)
    self.auth_service = AuthService()
  
+  def initialize(self):
+    self.user_repository.initialize()
+    return self
+  
  def create_user(self, user_data: UserCreate | UserCreateNoValidation) -> UserInDB:
    """
    Create a new user with business logic validation.
--- a/src/file-processor/requirements.txt
+++ b/src/file-processor/requirements.txt
@@ -1,11 +1,14 @@
+asgiref==3.9.1
 bcrypt==4.3.0
 celery==5.5.3
 email-validator==2.3.0
 fastapi==0.116.1
 httptools==0.6.4
 motor==3.7.1
-pymongo==4.15.0
 pydantic==2.11.9
+PyJWT==2.10.1
+pymongo==4.15.0
 redis==6.4.0
 uvicorn==0.35.0
-python-magic==0.4.27
+python-magic==0.4.27
+watchdog==6.0.0
--- a/src/frontend/.dockerignore
+++ b/src/frontend/.dockerignore
@@ -0,0 +1,41 @@
+# Dependencies
+node_modules
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Build outputs
+dist
+build
+
+# Environment files
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# IDE files
+.vscode
+.idea
+*.swp
+*.swo
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Git
+.git
+.gitignore
+
+# Docker
+Dockerfile
+.dockerignore
+
+# Logs
+*.log
--- a/src/frontend/Dockerfile
+++ b/src/frontend/Dockerfile
@@ -0,0 +1,20 @@
+# Use Node.js 20 Alpine for lightweight container
+FROM node:20-alpine
+
+# Set working directory
+WORKDIR /app
+
+# Copy package.json and package-lock.json (if available)
+COPY package*.json ./
+
+# Install dependencies
+RUN npm install
+
+# Copy source code
+COPY . .
+
+# Expose Vite default port
+EXPOSE 5173
+
+# Start development server with host 0.0.0.0 to accept external connections
+CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "5173"]
--- a/src/worker/Dockerfile
+++ b/src/worker/Dockerfile
@@ -3,12 +3,18 @@ FROM python:3.12-slim
 # Set working directory
 WORKDIR /app

+# Install libmagic
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libmagic1 \
+    file \
+ && rm -rf /var/lib/apt/lists/*
+
 # Copy requirements and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

 # Copy application code
-COPY tasks/ .
+COPY . .

 # Command will be overridden by docker-compose
-CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
+CMD ["celery", "-A", "main", "worker", "--loglevel=info"]
--- a/src/worker/requirements.txt
+++ b/src/worker/requirements.txt
@@ -1,4 +1,13 @@
-
+asgiref==3.9.1
+bcrypt==4.3.0
 celery==5.5.3
+email-validator==2.3.0
+fastapi==0.116.1
+httptools==0.6.4
+motor==3.7.1
+pymongo==4.15.0
+pydantic==2.11.9
 redis==6.4.0
-pymongo==4.15.0
+uvicorn==0.35.0
+python-magic==0.4.27
+watchdog==6.0.0
--- a/src/worker/tasks/document_processing.py
+++ b/src/worker/tasks/document_processing.py
@@ -0,0 +1,85 @@
+"""
+Celery tasks for document processing with ProcessingJob status management.
+
+This module contains Celery tasks that handle document content extraction
+and update processing job statuses throughout the task lifecycle.
+"""
+
+import logging
+from typing import Any, Dict
+
+from app.config import settings
+from app.database.connection import get_database
+from app.services.document_service import DocumentService
+from tasks.main import celery_app
+
+logger = logging.getLogger(__name__)
+
+@celery_app.task(bind=True, autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 60})
+def process_document(self, filepath: str) -> Dict[str, Any]:
+  """
+  Process a document file and extract its content.
+
+  This task:
+  1. Updates the processing job status to PROCESSING
+  2. Performs document content extraction
+  3. Updates job status to COMPLETED or FAILED based on result
+
+  Args:
+      self : Celery task instance
+      filepath: Full path to the document file to process
+
+  Returns:
+      Dictionary containing processing results
+
+  Raises:
+      Exception: Any processing error (will trigger retry)
+  """
+  task_id = self.request.id
+  logger.info(f"Starting document processing task {task_id} for file: {filepath}")
+  
+  database = get_database()
+  document_service = DocumentService(database=database, objects_folder=settings.get_objects_folder())
+  from app.services.job_service import JobService
+  job_service = JobService(database=database)
+  
+  job = None
+  try:
+    # Step 1: Insert the document in DB
+    document = document_service.create_document(filepath)
+    logger.info(f"Job {task_id} created for document {document.id} with file path: {filepath}")
+    
+    # Step 2: Create a new job record for the document
+    job = job_service.create_job(task_id=task_id, document_id=document.id)
+    
+    # Step 3: Mark job as started
+    job_service.mark_job_as_started(job_id=job.id)
+    logger.info(f"Job {task_id} marked as PROCESSING")
+    
+    # Step 4: Mark job as completed
+    job_service.mark_job_as_completed(job_id=job.id)
+    logger.info(f"Job {task_id} marked as COMPLETED")
+    
+    return {
+        "task_id": task_id,
+        "filepath": filepath,
+        "status": "completed",
+    }
+  
+  except Exception as e:
+    error_message = f"Document processing failed: {str(e)}"
+    logger.error(f"Task {task_id} failed: {error_message}")
+    
+    try:
+      # Mark job as failed
+      if job is not None:
+        job_service.mark_job_as_failed(job_id=job.id, error_message=error_message)
+        logger.info(f"Job {task_id} marked as FAILED")
+      else:
+        logger.error(f"Failed to process {filepath}. error = {str(e)}")
+    except Exception as job_error:
+      logger.error(f"Failed to update job status for task {task_id}: {str(job_error)}")
+    
+    # Re-raise the exception to trigger Celery retry mechanism
+    raise
+  
--- a/src/worker/tasks/main.py
+++ b/src/worker/tasks/main.py
@@ -3,9 +3,8 @@ Celery worker for MyDocManager document processing tasks.

 This module contains all Celery tasks for processing documents.
 """
-
 import os
-import time
+
 from celery import Celery

 # Environment variables
@@ -13,101 +12,25 @@ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
 MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")

 # Initialize Celery app
-app = Celery(
+celery_app = Celery(
  "mydocmanager_worker",
  broker=REDIS_URL,
-  backend=REDIS_URL
+  backend=REDIS_URL,
 )

+celery_app.autodiscover_tasks(["tasks.document_processing"])
+
 # Celery configuration
-app.conf.update(
+celery_app.conf.update(
  task_serializer="json",
  accept_content=["json"],
  result_serializer="json",
  timezone="UTC",
  enable_utc=True,
  task_track_started=True,
-  task_time_limit=300,  # 5 minutes
-  task_soft_time_limit=240,  # 4 minutes
+  task_time_limit=300,        # 5 minutes
+  task_soft_time_limit=240,   # 4 minutes
 )

-
-@app.task(bind=True)
-def test_task(self, message: str):
-  """
-  Test task for validating worker functionality.
-
-  Args:
-      message: Test message to process
-
-  Returns:
-      dict: Task result with processing information
-  """
-  try:
-    print(f"[WORKER] Starting test task with message: {message}")
-    
-    # Simulate some work
-    for i in range(5):
-      print(f"[WORKER] Processing step {i + 1}/5...")
-      time.sleep(1)
-      
-      # Update task progress
-      self.update_state(
-        state="PROGRESS",
-        meta={
-            "current": i + 1,
-            "total": 5,
-            "message": f"Processing step {i + 1}"
-        }
-      )
-    
-    result = {
-        "status": "completed",
-        "message": f"Successfully processed: {message}",
-        "processed_at": time.time(),
-        "worker_id": self.request.id
-    }
-    
-    print(f"[WORKER] Test task completed successfully: {result}")
-    return result
-  
-  except Exception as exc:
-    print(f"[WORKER] Test task failed: {str(exc)}")
-    raise self.retry(exc=exc, countdown=60, max_retries=3)
-
-
-@app.task(bind=True)
-def process_document_task(self, file_path: str):
-  """
-  Placeholder task for document processing.
-
-  Args:
-      file_path: Path to the document to process
-
-  Returns:
-      dict: Processing result
-  """
-  try:
-    print(f"[WORKER] Starting document processing for: {file_path}")
-    
-    # Placeholder for document processing logic
-    time.sleep(2)  # Simulate processing time
-    
-    result = {
-        "status": "completed",
-        "file_path": file_path,
-        "processed_at": time.time(),
-        "content": f"Placeholder content for {file_path}",
-        "worker_id": self.request.id
-    }
-    
-    print(f"[WORKER] Document processing completed: {file_path}")
-    return result
-  
-  except Exception as exc:
-    print(f"[WORKER] Document processing failed for {file_path}: {str(exc)}")
-    raise self.retry(exc=exc, countdown=60, max_retries=3)
-
-
 if __name__ == "__main__":
-  app.start()
+  celery_app.start()