diff --git a/src/file-processor/app/database/repositories/document_repository.py b/src/file-processor/app/database/repositories/document_repository.py
index 7cc468b..67754e4 100644
--- a/src/file-processor/app/database/repositories/document_repository.py
+++ b/src/file-processor/app/database/repositories/document_repository.py
@@ -12,6 +12,20 @@ from difflib import SequenceMatcher
 from motor.motor_asyncio import AsyncIOMotorCollection
 from app.models.document import FileDocument
 from app.database.connection import get_database
+from app.utils.ducment_matching import fuzzy_matching, subsequence_matching
+
+
+class MatchMethodBase:
+  pass
+
+
+class SubsequenceMatching(MatchMethodBase):
+  pass
+
+
+class FuzzyMatching(MatchMethodBase):
+  def __init__(self, threshold: float = 0.6):
+    self.threshold = threshold
 
 
 class FileDocumentRepository:
@@ -28,6 +42,14 @@ class FileDocumentRepository:
     self.collection: AsyncIOMotorCollection = self.db.files
     self._ensure_indexes()
   
+  async def initialize(self):
+    """
+    Initialize repository by ensuring required indexes exist.
+
+    Should be called after repository instantiation to setup database indexes.
+    """
+    await self._ensure_indexes()
+  
   async def _ensure_indexes(self):
     """
     Ensure required database indexes exist.
@@ -64,7 +86,7 @@ class FileDocumentRepository:
       return file_data
     
     except DuplicateKeyError as e:
-      raise DuplicateKeyError(f"File with same hash already exists: {e}")
+      raise DuplicateKeyError(f"File with same file path already exists: {e}")
     except PyMongoError as e:
       raise ValueError(f"Failed to create file document: {e}")
   
@@ -128,13 +150,13 @@ class FileDocumentRepository:
     except PyMongoError:
       return None
   
-  async def find_document_by_name(self, filename: str, similarity_threshold: float = 0.6) -> List[FileDocument]:
+  async def find_document_by_name(self, filename: str, matching_method: MatchMethodBase = None) -> List[FileDocument]:
     """
     Find file documents by filename using fuzzy matching.
     
     Args:
         filename (str): Filename to search for
-        similarity_threshold (float): Minimum similarity ratio (0.0 to 1.0)
+        matching_method (MatchMethodBase): Minimum similarity ratio (0.0 to 1.0)
         
     Returns:
         List[FileDocument]: List of matching files sorted by similarity score
@@ -143,21 +165,12 @@ class FileDocumentRepository:
       # Get all files from database
       cursor = self.collection.find({})
       all_files = await cursor.to_list(length=None)
+      all_documents = [FileDocument(**file_doc) for file_doc in all_files]
       
-      matches = []
-      for file_doc in all_files:
-        file_obj = FileDocument(**file_doc)
-        # Calculate similarity between search term and filename
-        similarity = SequenceMatcher(None, filename.lower(), file_obj.filename.lower()).ratio()
-        
-        if similarity >= similarity_threshold:
-          matches.append((file_obj, similarity))
+      if isinstance(matching_method, FuzzyMatching):
+        return fuzzy_matching(filename, all_documents, matching_method.threshold)
       
-      # Sort by similarity score (highest first)
-      matches.sort(key=lambda x: x[1], reverse=True)
-      
-      # Return only the FileDocument objects
-      return [match[0] for match in matches]
+      return subsequence_matching(filename, all_documents)
     
     except PyMongoError:
       return []
diff --git a/src/file-processor/app/database/repositories/user_repository.py b/src/file-processor/app/database/repositories/user_repository.py
index 54a5c87..c3b29e3 100644
--- a/src/file-processor/app/database/repositories/user_repository.py
+++ b/src/file-processor/app/database/repositories/user_repository.py
@@ -34,6 +34,14 @@ class UserRepository:
     self.collection: AsyncIOMotorCollection = database.users
     self._ensure_indexes()
   
+  async def initialize(self):
+    """
+    Initialize repository by ensuring required indexes exist.
+
+    Should be called after repository instantiation to setup database indexes.
+    """
+    await self._ensure_indexes()
+  
   async def _ensure_indexes(self):
     """
     Ensure required database indexes exist.
diff --git a/src/file-processor/app/utils/ducment_matching.py b/src/file-processor/app/utils/ducment_matching.py
new file mode 100644
index 0000000..2ea24cd
--- /dev/null
+++ b/src/file-processor/app/utils/ducment_matching.py
@@ -0,0 +1,60 @@
+from difflib import SequenceMatcher
+
+from app.models.document import FileDocument
+
+
+def _is_subsequence(query: str, target: str) -> tuple[bool, float]:
+  """
+  Check if query is a subsequence of target (case-insensitive).
+  Returns (match, score).
+  Score is higher when the query letters are closer together in the target.
+  """
+  query = query.lower()
+  target = target.lower()
+  
+  positions = []
+  idx = 0
+  
+  for char in query:
+    idx = target.find(char, idx)
+    if idx == -1:
+      return False, 0.0
+    positions.append(idx)
+    idx += 1
+  
+  # Smallest window containing all matched chars
+  window_size = positions[-1] - positions[0] + 1
+  
+  # Score: ratio of query length vs window size (compactness)
+  score = len(query) / window_size
+  
+  return True, score
+
+def fuzzy_matching(filename: str, documents: list[FileDocument], similarity_threshold: float = 0.7):
+  matches = []
+  for file_doc in documents:
+    # Calculate similarity between search term and filename
+    similarity = SequenceMatcher(None, filename.lower(), file_doc.filename.lower()).ratio()
+    
+    if similarity >= similarity_threshold:
+      matches.append((file_doc, similarity))
+  
+  # Sort by similarity score (highest first)
+  matches.sort(key=lambda x: x[1], reverse=True)
+  
+  # Return only the FileDocument objects
+  return [match[0] for match in matches]
+  
+
+def subsequence_matching(query: str, documents: list[FileDocument]):
+  matches = []
+  for file_doc in documents:
+    matched, score = _is_subsequence(query, file_doc.filename)
+    if matched:
+      matches.append((file_doc, score))
+  
+  # Sort by score (highest first)
+  matches.sort(key=lambda x: x[1], reverse=True)
+  
+  # Return only the FileDocument objects
+  return [match[0] for match in matches]
diff --git a/tests/test_document_repository.py b/tests/test_document_repository.py
index 4ef5d85..e61180d 100644
--- a/tests/test_document_repository.py
+++ b/tests/test_document_repository.py
@@ -26,6 +26,7 @@ async def in_memory_repository():
   repo = FileDocumentRepository()
   repo.db = db
   repo.collection = db.files
+  await repo.initialize()
   return repo
 
 
@@ -87,6 +88,7 @@ class TestFileDocumentRepositoryInitialization:
     """Test repository initialization."""
     # Arrange
     repo = FileDocumentRepository()
+    await repo.initialize()
     
     # Act & Assert (should not raise any exception)
     assert repo.db is not None
@@ -276,48 +278,6 @@ class TestFileDocumentRepositoryFuzzySearch:
     assert "document1.pdf" in filenames
     assert "similar_document.pdf" in filenames
   
-  @pytest.mark.asyncio
-  async def test_i_can_find_documents_with_custom_threshold(self, in_memory_repository, multiple_sample_documents):
-    """Test finding documents with custom similarity threshold."""
-    # Arrange
-    for doc in multiple_sample_documents:
-      await in_memory_repository.create_document(doc)
-    
-    # Act - Very high threshold should only match exact or very similar names
-    found_docs = await in_memory_repository.find_document_by_name("document1.pdf", similarity_threshold=0.9)
-    
-    # Assert
-    assert len(found_docs) == 1
-    assert found_docs[0].filename == "document1.pdf"
-  
-  @pytest.mark.asyncio
-  async def test_i_can_find_documents_sorted_by_similarity(self, in_memory_repository, multiple_sample_documents):
-    """Test that documents are sorted by similarity score (highest first)."""
-    # Arrange
-    for doc in multiple_sample_documents:
-      await in_memory_repository.create_document(doc)
-    
-    # Act
-    found_docs = await in_memory_repository.find_document_by_name("document1", similarity_threshold=0.3)
-    
-    # Assert
-    assert len(found_docs) >= 1
-    # First result should be the most similar (document1.pdf)
-    assert found_docs[0].filename == "document1.pdf"
-  
-  @pytest.mark.asyncio
-  async def test_i_cannot_find_documents_below_threshold(self, in_memory_repository, multiple_sample_documents):
-    """Test that no documents are returned when similarity is below threshold."""
-    # Arrange
-    for doc in multiple_sample_documents:
-      await in_memory_repository.create_document(doc)
-    
-    # Act
-    found_docs = await in_memory_repository.find_document_by_name("xyz", similarity_threshold=0.6)
-    
-    # Assert
-    assert len(found_docs) == 0
-  
   @pytest.mark.asyncio
   async def test_i_cannot_find_documents_by_name_with_pymongo_error(self, in_memory_repository, mocker):
     """Test handling of PyMongo errors during name search."""
@@ -377,11 +337,13 @@ class TestFileDocumentRepositoryListing:
     # Create documents with different timestamps
     doc1 = sample_file_document.model_copy()
     doc1.filename = "oldest.pdf"
+    doc1.filepath = f"/path/to/{doc1.filename}"
     doc1.file_hash = "hash1" + "0" * 58
     doc1.detected_at = datetime.now() - timedelta(hours=2)
     
     doc2 = sample_file_document.model_copy()
     doc2.filename = "newest.pdf"
+    doc2.filepath = f"/path/to/{doc2.filename}"
     doc2.file_hash = "hash2" + "0" * 58
     doc2.detected_at = datetime.now()
     
@@ -433,7 +395,6 @@ class TestFileDocumentRepositoryUpdate:
     
     # Assert
     assert updated_doc is not None
-    assert updated_doc.tags == sample_update_data["tags"]
     assert updated_doc.file_type == sample_update_data["file_type"]
     assert updated_doc.id == created_doc.id
     assert updated_doc.filename == created_doc.filename  # Unchanged fields remain
@@ -443,30 +404,30 @@ class TestFileDocumentRepositoryUpdate:
     """Test updating document with partial data."""
     # Arrange
     created_doc = await in_memory_repository.create_document(sample_file_document)
-    partial_update = {"tags": ["new_tag"]}
+    partial_update = {"file_type": FileType("txt")}
     
     # Act
     updated_doc = await in_memory_repository.update_document(str(created_doc.id), partial_update)
     
     # Assert
     assert updated_doc is not None
-    assert updated_doc.tags == ["new_tag"]
+    assert updated_doc.file_type == FileType("txt")
     assert updated_doc.filename == created_doc.filename  # Should remain unchanged
-    assert updated_doc.file_type == created_doc.file_type  # Should remain unchanged
+    assert updated_doc.filepath == created_doc.filepath  # Should remain unchanged
   
   @pytest.mark.asyncio
   async def test_i_can_update_document_filtering_none_values(self, in_memory_repository, sample_file_document):
     """Test that None values are filtered out from update data."""
     # Arrange
     created_doc = await in_memory_repository.create_document(sample_file_document)
-    update_with_none = {"tags": ["new_tag"], "file_type": None}
+    update_with_none = {"metadata": {"tags": ["updated", "document"]}, "file_type": None}
     
     # Act
     updated_doc = await in_memory_repository.update_document(str(created_doc.id), update_with_none)
     
     # Assert
     assert updated_doc is not None
-    assert updated_doc.tags == ["new_tag"]
+    assert updated_doc.metadata == {"tags": ["updated", "document"]}
     assert updated_doc.file_type == created_doc.file_type  # Should remain unchanged (None filtered out)
   
   @pytest.mark.asyncio
@@ -483,7 +444,7 @@ class TestFileDocumentRepositoryUpdate:
     assert result is not None
     assert result.filename == created_doc.filename
     assert result.file_hash == created_doc.file_hash
-    assert result.tags == created_doc.tags
+    assert result.metadata == created_doc.metadata
   
   @pytest.mark.asyncio
   async def test_i_cannot_update_document_with_invalid_id(self, in_memory_repository, sample_update_data):
diff --git a/tests/test_user_repository.py b/tests/test_user_repository.py
index c6548be..0d6d39b 100644
--- a/tests/test_user_repository.py
+++ b/tests/test_user_repository.py
@@ -23,7 +23,7 @@ async def in_memory_repository():
   client = AsyncMongoMockClient()
   db = client.test_database
   repo = UserRepository(db)
-  #await repo.initialize()
+  await repo.initialize()
   return repo
 
 
diff --git a/tests/test_utils_document_matching.py b/tests/test_utils_document_matching.py
new file mode 100644
index 0000000..5cdc941
--- /dev/null
+++ b/tests/test_utils_document_matching.py
@@ -0,0 +1,87 @@
+import os
+from datetime import datetime
+
+import pytest
+from app.models.document import FileDocument, FileType
+from app.utils.ducment_matching import fuzzy_matching, subsequence_matching
+
+
+def get_doc(filename: str = None):
+  """Sample FileDocument data for testing."""
+  return FileDocument(
+    filename=f"{filename}",
+    filepath=f"/path/to/{filename}",
+    file_hash="a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456",
+    file_type=FileType(os.path.splitext(filename)[1].lstrip(".") or "txt"),
+    detected_at=datetime.now(),
+  )
+
+
+class TestFuzzyMatching:
+  def test_i_can_find_exact_match_with_fuzzy(self):
+    # Exact match should always pass
+    docs = [get_doc(filename="hello.txt")]
+    result = fuzzy_matching("hello.txt", docs)
+    assert len(result) == 1
+    assert result[0].filename == "hello.txt"
+  
+  def test_i_can_find_close_match_with_fuzzy(self):
+    # "helo.txt" should match "hello.txt" with high similarity
+    docs = [get_doc(filename="hello.txt")]
+    result = fuzzy_matching("helo.txt", docs, similarity_threshold=0.7)
+    assert len(result) == 1
+    assert result[0].filename == "hello.txt"
+  
+  def test_i_cannot_find_dissimilar_match_with_fuzzy(self):
+    # "world.txt" should not match "hello.txt"
+    docs = [get_doc(filename="hello.txt")]
+    result = fuzzy_matching("world.txt", docs, similarity_threshold=0.7)
+    assert len(result) == 0
+  
+  def test_i_can_sort_by_similarity_in_fuzzy(self):
+    # "helo.txt" is closer to "hello.txt" than "hllll.txt"
+    docs = [
+        get_doc(filename="hello.txt"),
+        get_doc(filename="hllll.txt"),
+    ]
+    result = fuzzy_matching("helo.txt", docs, similarity_threshold=0.5)
+    assert result[0].filename == "hello.txt"
+
+
+class TestSubsequenceMatching:
+  def test_i_can_match_subsequence_simple(self):
+    # "ifb" should match "ilFaitBeau.txt"
+    docs = [get_doc(filename="ilFaitBeau.txt")]
+    result = subsequence_matching("ifb", docs)
+    assert len(result) == 1
+    assert result[0].filename == "ilFaitBeau.txt"
+  
+  def test_i_cannot_match_wrong_order_subsequence(self):
+    # "fib" should not match "ilFaitBeau.txt" because the order is wrong
+    docs = [get_doc(filename="ilFaitBeau.txt")]
+    result = subsequence_matching("bfi", docs)
+    assert len(result) == 0
+  
+  def test_i_can_match_multiple_documents_subsequence(self):
+    # "ifb" should match both filenames, but "ilFaitBeau.txt" has a higher score
+    docs = [
+        get_doc(filename="ilFaitBeau.txt"),
+        get_doc(filename="information_base.txt"),
+    ]
+    result = subsequence_matching("ifb", docs)
+    assert len(result) == 2
+    assert result[0].filename == "ilFaitBeau.txt"
+    assert result[1].filename == "information_base.txt"
+  
+  def test_i_cannot_match_unrelated_subsequence(self):
+    # "xyz" should not match any file
+    docs = [get_doc(filename="ilFaitBeau.txt")]
+    result = subsequence_matching("xyz", docs)
+    assert len(result) == 0
+  
+  def test_i_can_handle_case_insensitivity_in_subsequence(self):
+    # Matching should be case-insensitive
+    docs = [get_doc(filename="HelloWorld.txt")]
+    result = subsequence_matching("hw", docs)
+    assert len(result) == 1
+    assert result[0].filename == "HelloWorld.txt"