MyObsidianAI/obsidian_rag/cli.py

"""
CLI module for Obsidian RAG Backend.

Provides command-line interface for indexing and searching the Obsidian vault.
"""
import os
from pathlib import Path
from typing import Optional

import typer
from rich.console import Console
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn

from indexer import index_vault
from rag_chain import RAGChain
from searcher import search_vault, SearchResult

app = typer.Typer(
  name="obsidian-rag",
  help="Local semantic search backend for Obsidian markdown files",
  add_completion=False,
)
console = Console()

# Default ChromaDB path
DEFAULT_CHROMA_PATH = Path.home() / ".obsidian_rag" / "chroma_db"


def _truncate_path(path: str, max_len: int = 60) -> str:
  """Return a truncated version of the file path if too long."""
  if len(path) <= max_len:
    return path
  return "..." + path[-(max_len - 3):]


@app.command()
def index(
    vault_path: str = typer.Argument(
      ...,
      help="Path to the Obsidian vault directory",
    ),
    chroma_path: Optional[str] = typer.Option(
      None,
      "--chroma-path",
      "-c",
      help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
    ),
    collection_name: str = typer.Option(
      "obsidian_vault",
      "--collection",
      help="Name of the ChromaDB collection",
    ),
    max_chunk_tokens: int = typer.Option(
      200,
      "--max-tokens",
      help="Maximum tokens per chunk",
    ),
    overlap_tokens: int = typer.Option(
      30,
      "--overlap",
      help="Number of overlapping tokens between chunks",
    ),
):
  """
  Index all markdown files from the Obsidian vault into ChromaDB.
  """
  vault_path_obj = Path(vault_path)
  chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH

  if not vault_path_obj.exists():
    console.print(f"[red]✗ Error:[/red] Vault path does not exist: {vault_path}")
    raise typer.Exit(code=1)

  if not vault_path_obj.is_dir():
    console.print(f"[red]✗ Error:[/red] Vault path is not a directory: {vault_path}")
    raise typer.Exit(code=1)

  chroma_path_obj.mkdir(parents=True, exist_ok=True)

  md_files = list(vault_path_obj.rglob("*.md"))
  total_files = len(md_files)

  if total_files == 0:
    console.print(f"[yellow]⚠ Warning:[/yellow] No markdown files found in {vault_path}")
    raise typer.Exit(code=0)

  console.print(f"\n[cyan]Found {total_files} markdown files to index[/cyan]\n")

  # One single stable progress bar
  with Progress(
      SpinnerColumn(),
      TextColumn("[progress.description]{task.description}"),
      BarColumn(),
      TaskProgressColumn(),
      console=console,
  ) as progress:

    main_task = progress.add_task("[cyan]Indexing vault...", total=total_files)

    # Create a separate status line below the progress bar
    status_line = console.status("[dim]Preparing first file...")

    def progress_callback(current_file: str, files_processed: int, total: int):
      """Update progress bar and status message."""
      progress.update(main_task, completed=files_processed)

      short_file = _truncate_path(current_file)
      status_line.update(f"[dim]Processing: {short_file}")

    try:
      with status_line:
        stats = index_vault(
          vault_path=str(vault_path_obj),
          chroma_db_path=str(chroma_path_obj),
          collection_name=collection_name,
          max_chunk_tokens=max_chunk_tokens,
          overlap_tokens=overlap_tokens,
          progress_callback=progress_callback,
        )

      progress.update(main_task, completed=total_files)
      status_line.update("[green]✓ Completed")

    except Exception as e:
      console.print(f"\n[red]✗ Error during indexing:[/red] {str(e)}")
      raise typer.Exit(code=1)

  console.print()
  _display_index_results(stats)


@app.command()
def search(
    query: str = typer.Argument(
      ...,
      help="Search query",
    ),
    chroma_path: Optional[str] = typer.Option(
      None,
      "--chroma-path",
      "-c",
      help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
    ),
    collection_name: str = typer.Option(
      "obsidian_vault",
      "--collection",
      help="Name of the ChromaDB collection",
    ),
    limit: int = typer.Option(
      5,
      "--limit",
      "-l",
      help="Maximum number of results to return",
    ),
    min_score: float = typer.Option(
      0.0,
      "--min-score",
      "-s",
      help="Minimum similarity score (0.0 to 1.0)",
    ),
    format: str = typer.Option(
      "compact",
      "--format",
      "-f",
      help="Output format: compact (default), panel, table",
    ),
):
  """
  Search the indexed vault for semantically similar content.

  Returns relevant sections from your Obsidian notes based on
  semantic similarity to the query.
  """
  # Resolve paths
  chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH

  # Validate chroma path exists
  if not chroma_path_obj.exists():
    console.print(
      f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
      f"Please run 'obsidian-rag index <vault_path>' first to create the index."
    )
    raise typer.Exit(code=1)

  # Validate format
  valid_formats = ["compact", "panel", "table"]
  if format not in valid_formats:
    console.print(f"[red]✗ Error:[/red] Invalid format '{format}'. Valid options: {', '.join(valid_formats)}")
    raise typer.Exit(code=1)

  # Perform search
  try:
    with console.status("[cyan]Searching...", spinner="dots"):
      results = search_vault(
        query=query,
        chroma_db_path=str(chroma_path_obj),
        collection_name=collection_name,
        limit=limit,
        min_score=min_score,
      )
  except ValueError as e:
    console.print(f"[red]✗ Error:[/red] {str(e)}")
    raise typer.Exit(code=1)
  except Exception as e:
    console.print(f"[red]✗ Unexpected error:[/red] {str(e)}")
    raise typer.Exit(code=1)

  # Display results
  if not results:
    console.print(f"\n[yellow]No results found for query:[/yellow] '{query}'")
    if min_score > 0:
      console.print(f"[dim]Try lowering --min-score (currently {min_score})[/dim]")
    raise typer.Exit(code=0)

  console.print(f"\n[cyan]Found {len(results)} result(s) for:[/cyan] '{query}'\n")

  # Display with selected format
  if format == "compact":
    _display_results_compact(results)
  elif format == "panel":
    _display_results_panel(results)
  elif format == "table":
    _display_results_table(results)


@app.command()
def ask(
    query: str = typer.Argument(
      ...,
      help="Question to ask the LLM based on your Obsidian notes."
    ),
    chroma_path: Optional[str] = typer.Option(
      None,
      "--chroma-path",
      "-c",
      help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
    ),
    collection_name: str = typer.Option(
      "obsidian_vault",
      "--collection",
      help="Name of the ChromaDB collection",
    ),
    top_k: int = typer.Option(
      5,
      "--top-k",
      "-k",
      help="Number of top chunks to use for context",
    ),
    min_score: float = typer.Option(
      0.0,
      "--min-score",
      "-s",
      help="Minimum similarity score for chunks",
    ),
    api_key: Optional[str] = typer.Option(
      None,
      "--api-key",
      help="Clovis API key (or set CLOVIS_API_KEY environment variable)",
    ),
    base_url: Optional[str] = typer.Option(
      None,
      "--base-url",
      help="Clovis base URL (or set CLOVIS_BASE_URL environment variable)",
    ),
):
  """
  Ask a question to the LLM using RAG over your Obsidian vault.
  """

  # Resolve ChromaDB path
  chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
  if not chroma_path_obj.exists():
    console.print(
      f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
      f"Please run 'obsidian-rag index <vault_path>' first to create the index."
    )
    raise typer.Exit(code=1)

  # Resolve API key and base URL
  api_key = api_key or os.getenv("CLOVIS_API_KEY")
  base_url = base_url or os.getenv("CLOVIS_BASE_URL")
  if not api_key or not base_url:
    console.print(
      "[red]✗ Error:[/red] API key or base URL not provided.\n"
      "Set them via --api-key / --base-url or environment variables CLOVIS_API_KEY and CLOVIS_BASE_URL."
    )
    raise typer.Exit(code=1)

  # Instantiate RAGChain
  rag = RAGChain(
    chroma_db_path=str(chroma_path_obj),
    collection_name=collection_name,
    top_k=top_k,
    min_score=min_score,
    api_key=api_key,
    base_url=base_url,
  )

  # Get answer from RAG
  try:
    with console.status("[cyan]Querying LLM...", spinner="dots"):
      answer, used_chunks = rag.answer_query(query)
  except Exception as e:
    console.print(f"[red]✗ Error:[/red] {str(e)}")
    raise typer.Exit(code=1)

  # Display answer
  console.print("\n[bold green]Answer:[/bold green]\n")
  console.print(answer + "\n")

  # Display sources used
  if used_chunks:
    sources = ", ".join(f"{c.file_path}#L{c.line_start}-L{c.line_end}" for c in used_chunks)
    console.print(f"[bold cyan]Sources:[/bold cyan] {sources}\n")
  else:
    console.print("[bold cyan]Sources:[/bold cyan] None\n")


def _display_index_results(stats: dict):
  """
  Display indexing results with rich formatting.

  Args:
      stats: Statistics dictionary from index_vault
  """
  files_processed = stats["files_processed"]
  chunks_created = stats["chunks_created"]
  errors = stats["errors"]

  # Success summary
  console.print(Panel(
    f"[green]✓[/green] Indexing completed\n\n"
    f"Files processed: [cyan]{files_processed}[/cyan]\n"
    f"Chunks created: [cyan]{chunks_created}[/cyan]\n"
    f"Collection: [cyan]{stats['collection_name']}[/cyan]",
    title="[bold]Indexing Results[/bold]",
    border_style="green",
  ))

  # Display errors if any
  if errors:
    console.print(f"\n[yellow]⚠ {len(errors)} file(s) skipped due to errors:[/yellow]\n")
    for error in errors:
      console.print(f"  [red]•[/red] {error['file']}: [dim]{error['error']}[/dim]")


def _display_results_compact(results: list[SearchResult]):
  """
  Display search results in compact format.

  Args:
      results: List of SearchResult objects
  """
  for idx, result in enumerate(results, 1):
    # Format score as stars (0-5 scale)
    stars = "⭐" * int(result.score * 5)

    console.print(f"[bold cyan]{idx}.[/bold cyan] {result.file_path} [dim](score: {result.score:.2f} {stars})[/dim]")
    console.print(
      f"   Section: [yellow]{result.section_title}[/yellow] | Lines: [dim]{result.line_start}-{result.line_end}[/dim]")

    # Truncate text if too long
    text = result.text
    if len(text) > 200:
      text = text[:200] + "..."

    console.print(f"   {text}\n")


def _display_results_panel(results: list[SearchResult]):
  """
  Display search results in panel format (rich boxes).

  Args:
      results: List of SearchResult objects
  """
  # TODO: Implement panel format in future
  console.print("[yellow]Panel format not yet implemented. Using compact format.[/yellow]\n")
  _display_results_compact(results)


def _display_results_table(results: list[SearchResult]):
  """
  Display search results in table format.

  Args:
      results: List of SearchResult objects
  """
  # TODO: Implement table format in future
  console.print("[yellow]Table format not yet implemented. Using compact format.[/yellow]\n")
  _display_results_compact(results)


def main():
  """
  Entry point for the CLI application.
  """
  app()


if __name__ == "__main__":
  main()