Files
MyObsidianAI/obsidian_rag/cli.py
Kodjo Sossouvi d4925f7969 Initial commit
2025-12-12 11:31:44 +01:00

404 lines
11 KiB
Python

"""
CLI module for Obsidian RAG Backend.
Provides command-line interface for indexing and searching the Obsidian vault.
"""
import os
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
from indexer import index_vault
from rag_chain import RAGChain
from searcher import search_vault, SearchResult
app = typer.Typer(
name="obsidian-rag",
help="Local semantic search backend for Obsidian markdown files",
add_completion=False,
)
console = Console()
# Default ChromaDB path
DEFAULT_CHROMA_PATH = Path.home() / ".obsidian_rag" / "chroma_db"
def _truncate_path(path: str, max_len: int = 60) -> str:
"""Return a truncated version of the file path if too long."""
if len(path) <= max_len:
return path
return "..." + path[-(max_len - 3):]
@app.command()
def index(
vault_path: str = typer.Argument(
...,
help="Path to the Obsidian vault directory",
),
chroma_path: Optional[str] = typer.Option(
None,
"--chroma-path",
"-c",
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
),
collection_name: str = typer.Option(
"obsidian_vault",
"--collection",
help="Name of the ChromaDB collection",
),
max_chunk_tokens: int = typer.Option(
200,
"--max-tokens",
help="Maximum tokens per chunk",
),
overlap_tokens: int = typer.Option(
30,
"--overlap",
help="Number of overlapping tokens between chunks",
),
):
"""
Index all markdown files from the Obsidian vault into ChromaDB.
"""
vault_path_obj = Path(vault_path)
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
if not vault_path_obj.exists():
console.print(f"[red]✗ Error:[/red] Vault path does not exist: {vault_path}")
raise typer.Exit(code=1)
if not vault_path_obj.is_dir():
console.print(f"[red]✗ Error:[/red] Vault path is not a directory: {vault_path}")
raise typer.Exit(code=1)
chroma_path_obj.mkdir(parents=True, exist_ok=True)
md_files = list(vault_path_obj.rglob("*.md"))
total_files = len(md_files)
if total_files == 0:
console.print(f"[yellow]⚠ Warning:[/yellow] No markdown files found in {vault_path}")
raise typer.Exit(code=0)
console.print(f"\n[cyan]Found {total_files} markdown files to index[/cyan]\n")
# One single stable progress bar
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
console=console,
) as progress:
main_task = progress.add_task("[cyan]Indexing vault...", total=total_files)
# Create a separate status line below the progress bar
status_line = console.status("[dim]Preparing first file...")
def progress_callback(current_file: str, files_processed: int, total: int):
"""Update progress bar and status message."""
progress.update(main_task, completed=files_processed)
short_file = _truncate_path(current_file)
status_line.update(f"[dim]Processing: {short_file}")
try:
with status_line:
stats = index_vault(
vault_path=str(vault_path_obj),
chroma_db_path=str(chroma_path_obj),
collection_name=collection_name,
max_chunk_tokens=max_chunk_tokens,
overlap_tokens=overlap_tokens,
progress_callback=progress_callback,
)
progress.update(main_task, completed=total_files)
status_line.update("[green]✓ Completed")
except Exception as e:
console.print(f"\n[red]✗ Error during indexing:[/red] {str(e)}")
raise typer.Exit(code=1)
console.print()
_display_index_results(stats)
@app.command()
def search(
query: str = typer.Argument(
...,
help="Search query",
),
chroma_path: Optional[str] = typer.Option(
None,
"--chroma-path",
"-c",
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
),
collection_name: str = typer.Option(
"obsidian_vault",
"--collection",
help="Name of the ChromaDB collection",
),
limit: int = typer.Option(
5,
"--limit",
"-l",
help="Maximum number of results to return",
),
min_score: float = typer.Option(
0.0,
"--min-score",
"-s",
help="Minimum similarity score (0.0 to 1.0)",
),
format: str = typer.Option(
"compact",
"--format",
"-f",
help="Output format: compact (default), panel, table",
),
):
"""
Search the indexed vault for semantically similar content.
Returns relevant sections from your Obsidian notes based on
semantic similarity to the query.
"""
# Resolve paths
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
# Validate chroma path exists
if not chroma_path_obj.exists():
console.print(
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
)
raise typer.Exit(code=1)
# Validate format
valid_formats = ["compact", "panel", "table"]
if format not in valid_formats:
console.print(f"[red]✗ Error:[/red] Invalid format '{format}'. Valid options: {', '.join(valid_formats)}")
raise typer.Exit(code=1)
# Perform search
try:
with console.status("[cyan]Searching...", spinner="dots"):
results = search_vault(
query=query,
chroma_db_path=str(chroma_path_obj),
collection_name=collection_name,
limit=limit,
min_score=min_score,
)
except ValueError as e:
console.print(f"[red]✗ Error:[/red] {str(e)}")
raise typer.Exit(code=1)
except Exception as e:
console.print(f"[red]✗ Unexpected error:[/red] {str(e)}")
raise typer.Exit(code=1)
# Display results
if not results:
console.print(f"\n[yellow]No results found for query:[/yellow] '{query}'")
if min_score > 0:
console.print(f"[dim]Try lowering --min-score (currently {min_score})[/dim]")
raise typer.Exit(code=0)
console.print(f"\n[cyan]Found {len(results)} result(s) for:[/cyan] '{query}'\n")
# Display with selected format
if format == "compact":
_display_results_compact(results)
elif format == "panel":
_display_results_panel(results)
elif format == "table":
_display_results_table(results)
@app.command()
def ask(
query: str = typer.Argument(
...,
help="Question to ask the LLM based on your Obsidian notes."
),
chroma_path: Optional[str] = typer.Option(
None,
"--chroma-path",
"-c",
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
),
collection_name: str = typer.Option(
"obsidian_vault",
"--collection",
help="Name of the ChromaDB collection",
),
top_k: int = typer.Option(
5,
"--top-k",
"-k",
help="Number of top chunks to use for context",
),
min_score: float = typer.Option(
0.0,
"--min-score",
"-s",
help="Minimum similarity score for chunks",
),
api_key: Optional[str] = typer.Option(
None,
"--api-key",
help="Clovis API key (or set CLOVIS_API_KEY environment variable)",
),
base_url: Optional[str] = typer.Option(
None,
"--base-url",
help="Clovis base URL (or set CLOVIS_BASE_URL environment variable)",
),
):
"""
Ask a question to the LLM using RAG over your Obsidian vault.
"""
# Resolve ChromaDB path
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
if not chroma_path_obj.exists():
console.print(
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
)
raise typer.Exit(code=1)
# Resolve API key and base URL
api_key = api_key or os.getenv("CLOVIS_API_KEY")
base_url = base_url or os.getenv("CLOVIS_BASE_URL")
if not api_key or not base_url:
console.print(
"[red]✗ Error:[/red] API key or base URL not provided.\n"
"Set them via --api-key / --base-url or environment variables CLOVIS_API_KEY and CLOVIS_BASE_URL."
)
raise typer.Exit(code=1)
# Instantiate RAGChain
rag = RAGChain(
chroma_db_path=str(chroma_path_obj),
collection_name=collection_name,
top_k=top_k,
min_score=min_score,
api_key=api_key,
base_url=base_url,
)
# Get answer from RAG
try:
with console.status("[cyan]Querying LLM...", spinner="dots"):
answer, used_chunks = rag.answer_query(query)
except Exception as e:
console.print(f"[red]✗ Error:[/red] {str(e)}")
raise typer.Exit(code=1)
# Display answer
console.print("\n[bold green]Answer:[/bold green]\n")
console.print(answer + "\n")
# Display sources used
if used_chunks:
sources = ", ".join(f"{c.file_path}#L{c.line_start}-L{c.line_end}" for c in used_chunks)
console.print(f"[bold cyan]Sources:[/bold cyan] {sources}\n")
else:
console.print("[bold cyan]Sources:[/bold cyan] None\n")
def _display_index_results(stats: dict):
"""
Display indexing results with rich formatting.
Args:
stats: Statistics dictionary from index_vault
"""
files_processed = stats["files_processed"]
chunks_created = stats["chunks_created"]
errors = stats["errors"]
# Success summary
console.print(Panel(
f"[green]✓[/green] Indexing completed\n\n"
f"Files processed: [cyan]{files_processed}[/cyan]\n"
f"Chunks created: [cyan]{chunks_created}[/cyan]\n"
f"Collection: [cyan]{stats['collection_name']}[/cyan]",
title="[bold]Indexing Results[/bold]",
border_style="green",
))
# Display errors if any
if errors:
console.print(f"\n[yellow]⚠ {len(errors)} file(s) skipped due to errors:[/yellow]\n")
for error in errors:
console.print(f" [red]•[/red] {error['file']}: [dim]{error['error']}[/dim]")
def _display_results_compact(results: list[SearchResult]):
"""
Display search results in compact format.
Args:
results: List of SearchResult objects
"""
for idx, result in enumerate(results, 1):
# Format score as stars (0-5 scale)
stars = "" * int(result.score * 5)
console.print(f"[bold cyan]{idx}.[/bold cyan] {result.file_path} [dim](score: {result.score:.2f} {stars})[/dim]")
console.print(
f" Section: [yellow]{result.section_title}[/yellow] | Lines: [dim]{result.line_start}-{result.line_end}[/dim]")
# Truncate text if too long
text = result.text
if len(text) > 200:
text = text[:200] + "..."
console.print(f" {text}\n")
def _display_results_panel(results: list[SearchResult]):
"""
Display search results in panel format (rich boxes).
Args:
results: List of SearchResult objects
"""
# TODO: Implement panel format in future
console.print("[yellow]Panel format not yet implemented. Using compact format.[/yellow]\n")
_display_results_compact(results)
def _display_results_table(results: list[SearchResult]):
"""
Display search results in table format.
Args:
results: List of SearchResult objects
"""
# TODO: Implement table format in future
console.print("[yellow]Table format not yet implemented. Using compact format.[/yellow]\n")
_display_results_compact(results)
def main():
"""
Entry point for the CLI application.
"""
app()
if __name__ == "__main__":
main()