from difflib import SequenceMatcher

from app.models.document import FileDocument


def _is_subsequence(query: str, target: str) -> tuple[bool, float]:
  """
  Check if query is a subsequence of target (case-insensitive).
  Returns (match, score).
  Score is higher when the query letters are closer together in the target.
  """
  query = query.lower()
  target = target.lower()
  
  positions = []
  idx = 0
  
  for char in query:
    idx = target.find(char, idx)
    if idx == -1:
      return False, 0.0
    positions.append(idx)
    idx += 1
  
  # Smallest window containing all matched chars
  window_size = positions[-1] - positions[0] + 1
  
  # Score: ratio of query length vs window size (compactness)
  score = len(query) / window_size
  
  return True, score

def fuzzy_matching(filename: str, documents: list[FileDocument], similarity_threshold: float = 0.7):
  matches = []
  for file_doc in documents:
    # Calculate similarity between search term and filename
    similarity = SequenceMatcher(None, filename.lower(), file_doc.filename.lower()).ratio()
    
    if similarity >= similarity_threshold:
      matches.append((file_doc, similarity))
  
  # Sort by similarity score (highest first)
  matches.sort(key=lambda x: x[1], reverse=True)
  
  # Return only the FileDocument objects
  return [match[0] for match in matches]
  

def subsequence_matching(query: str, documents: list[FileDocument]):
  matches = []
  for file_doc in documents:
    matched, score = _is_subsequence(query, file_doc.filename)
    if matched:
      matches.append((file_doc, score))
  
  # Sort by score (highest first)
  matches.sort(key=lambda x: x[1], reverse=True)
  
  # Return only the FileDocument objects
  return [match[0] for match in matches]