404 lines
11 KiB
Python
404 lines
11 KiB
Python
"""
|
|
CLI module for Obsidian RAG Backend.
|
|
|
|
Provides command-line interface for indexing and searching the Obsidian vault.
|
|
"""
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import typer
|
|
from rich.console import Console
|
|
from rich.panel import Panel
|
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
|
|
|
|
from indexer import index_vault
|
|
from rag_chain import RAGChain
|
|
from searcher import search_vault, SearchResult
|
|
|
|
app = typer.Typer(
|
|
name="obsidian-rag",
|
|
help="Local semantic search backend for Obsidian markdown files",
|
|
add_completion=False,
|
|
)
|
|
console = Console()
|
|
|
|
# Default ChromaDB path
|
|
DEFAULT_CHROMA_PATH = Path.home() / ".obsidian_rag" / "chroma_db"
|
|
|
|
|
|
def _truncate_path(path: str, max_len: int = 60) -> str:
|
|
"""Return a truncated version of the file path if too long."""
|
|
if len(path) <= max_len:
|
|
return path
|
|
return "..." + path[-(max_len - 3):]
|
|
|
|
|
|
@app.command()
|
|
def index(
|
|
vault_path: str = typer.Argument(
|
|
...,
|
|
help="Path to the Obsidian vault directory",
|
|
),
|
|
chroma_path: Optional[str] = typer.Option(
|
|
None,
|
|
"--chroma-path",
|
|
"-c",
|
|
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
|
),
|
|
collection_name: str = typer.Option(
|
|
"obsidian_vault",
|
|
"--collection",
|
|
help="Name of the ChromaDB collection",
|
|
),
|
|
max_chunk_tokens: int = typer.Option(
|
|
200,
|
|
"--max-tokens",
|
|
help="Maximum tokens per chunk",
|
|
),
|
|
overlap_tokens: int = typer.Option(
|
|
30,
|
|
"--overlap",
|
|
help="Number of overlapping tokens between chunks",
|
|
),
|
|
):
|
|
"""
|
|
Index all markdown files from the Obsidian vault into ChromaDB.
|
|
"""
|
|
vault_path_obj = Path(vault_path)
|
|
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
|
|
|
if not vault_path_obj.exists():
|
|
console.print(f"[red]✗ Error:[/red] Vault path does not exist: {vault_path}")
|
|
raise typer.Exit(code=1)
|
|
|
|
if not vault_path_obj.is_dir():
|
|
console.print(f"[red]✗ Error:[/red] Vault path is not a directory: {vault_path}")
|
|
raise typer.Exit(code=1)
|
|
|
|
chroma_path_obj.mkdir(parents=True, exist_ok=True)
|
|
|
|
md_files = list(vault_path_obj.rglob("*.md"))
|
|
total_files = len(md_files)
|
|
|
|
if total_files == 0:
|
|
console.print(f"[yellow]⚠ Warning:[/yellow] No markdown files found in {vault_path}")
|
|
raise typer.Exit(code=0)
|
|
|
|
console.print(f"\n[cyan]Found {total_files} markdown files to index[/cyan]\n")
|
|
|
|
# One single stable progress bar
|
|
with Progress(
|
|
SpinnerColumn(),
|
|
TextColumn("[progress.description]{task.description}"),
|
|
BarColumn(),
|
|
TaskProgressColumn(),
|
|
console=console,
|
|
) as progress:
|
|
|
|
main_task = progress.add_task("[cyan]Indexing vault...", total=total_files)
|
|
|
|
# Create a separate status line below the progress bar
|
|
status_line = console.status("[dim]Preparing first file...")
|
|
|
|
def progress_callback(current_file: str, files_processed: int, total: int):
|
|
"""Update progress bar and status message."""
|
|
progress.update(main_task, completed=files_processed)
|
|
|
|
short_file = _truncate_path(current_file)
|
|
status_line.update(f"[dim]Processing: {short_file}")
|
|
|
|
try:
|
|
with status_line:
|
|
stats = index_vault(
|
|
vault_path=str(vault_path_obj),
|
|
chroma_db_path=str(chroma_path_obj),
|
|
collection_name=collection_name,
|
|
max_chunk_tokens=max_chunk_tokens,
|
|
overlap_tokens=overlap_tokens,
|
|
progress_callback=progress_callback,
|
|
)
|
|
|
|
progress.update(main_task, completed=total_files)
|
|
status_line.update("[green]✓ Completed")
|
|
|
|
except Exception as e:
|
|
console.print(f"\n[red]✗ Error during indexing:[/red] {str(e)}")
|
|
raise typer.Exit(code=1)
|
|
|
|
console.print()
|
|
_display_index_results(stats)
|
|
|
|
|
|
@app.command()
|
|
def search(
|
|
query: str = typer.Argument(
|
|
...,
|
|
help="Search query",
|
|
),
|
|
chroma_path: Optional[str] = typer.Option(
|
|
None,
|
|
"--chroma-path",
|
|
"-c",
|
|
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
|
),
|
|
collection_name: str = typer.Option(
|
|
"obsidian_vault",
|
|
"--collection",
|
|
help="Name of the ChromaDB collection",
|
|
),
|
|
limit: int = typer.Option(
|
|
5,
|
|
"--limit",
|
|
"-l",
|
|
help="Maximum number of results to return",
|
|
),
|
|
min_score: float = typer.Option(
|
|
0.0,
|
|
"--min-score",
|
|
"-s",
|
|
help="Minimum similarity score (0.0 to 1.0)",
|
|
),
|
|
format: str = typer.Option(
|
|
"compact",
|
|
"--format",
|
|
"-f",
|
|
help="Output format: compact (default), panel, table",
|
|
),
|
|
):
|
|
"""
|
|
Search the indexed vault for semantically similar content.
|
|
|
|
Returns relevant sections from your Obsidian notes based on
|
|
semantic similarity to the query.
|
|
"""
|
|
# Resolve paths
|
|
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
|
|
|
# Validate chroma path exists
|
|
if not chroma_path_obj.exists():
|
|
console.print(
|
|
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
|
|
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
|
|
)
|
|
raise typer.Exit(code=1)
|
|
|
|
# Validate format
|
|
valid_formats = ["compact", "panel", "table"]
|
|
if format not in valid_formats:
|
|
console.print(f"[red]✗ Error:[/red] Invalid format '{format}'. Valid options: {', '.join(valid_formats)}")
|
|
raise typer.Exit(code=1)
|
|
|
|
# Perform search
|
|
try:
|
|
with console.status("[cyan]Searching...", spinner="dots"):
|
|
results = search_vault(
|
|
query=query,
|
|
chroma_db_path=str(chroma_path_obj),
|
|
collection_name=collection_name,
|
|
limit=limit,
|
|
min_score=min_score,
|
|
)
|
|
except ValueError as e:
|
|
console.print(f"[red]✗ Error:[/red] {str(e)}")
|
|
raise typer.Exit(code=1)
|
|
except Exception as e:
|
|
console.print(f"[red]✗ Unexpected error:[/red] {str(e)}")
|
|
raise typer.Exit(code=1)
|
|
|
|
# Display results
|
|
if not results:
|
|
console.print(f"\n[yellow]No results found for query:[/yellow] '{query}'")
|
|
if min_score > 0:
|
|
console.print(f"[dim]Try lowering --min-score (currently {min_score})[/dim]")
|
|
raise typer.Exit(code=0)
|
|
|
|
console.print(f"\n[cyan]Found {len(results)} result(s) for:[/cyan] '{query}'\n")
|
|
|
|
# Display with selected format
|
|
if format == "compact":
|
|
_display_results_compact(results)
|
|
elif format == "panel":
|
|
_display_results_panel(results)
|
|
elif format == "table":
|
|
_display_results_table(results)
|
|
|
|
|
|
@app.command()
|
|
def ask(
|
|
query: str = typer.Argument(
|
|
...,
|
|
help="Question to ask the LLM based on your Obsidian notes."
|
|
),
|
|
chroma_path: Optional[str] = typer.Option(
|
|
None,
|
|
"--chroma-path",
|
|
"-c",
|
|
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
|
),
|
|
collection_name: str = typer.Option(
|
|
"obsidian_vault",
|
|
"--collection",
|
|
help="Name of the ChromaDB collection",
|
|
),
|
|
top_k: int = typer.Option(
|
|
5,
|
|
"--top-k",
|
|
"-k",
|
|
help="Number of top chunks to use for context",
|
|
),
|
|
min_score: float = typer.Option(
|
|
0.0,
|
|
"--min-score",
|
|
"-s",
|
|
help="Minimum similarity score for chunks",
|
|
),
|
|
api_key: Optional[str] = typer.Option(
|
|
None,
|
|
"--api-key",
|
|
help="Clovis API key (or set CLOVIS_API_KEY environment variable)",
|
|
),
|
|
base_url: Optional[str] = typer.Option(
|
|
None,
|
|
"--base-url",
|
|
help="Clovis base URL (or set CLOVIS_BASE_URL environment variable)",
|
|
),
|
|
):
|
|
"""
|
|
Ask a question to the LLM using RAG over your Obsidian vault.
|
|
"""
|
|
|
|
# Resolve ChromaDB path
|
|
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
|
if not chroma_path_obj.exists():
|
|
console.print(
|
|
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
|
|
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
|
|
)
|
|
raise typer.Exit(code=1)
|
|
|
|
# Resolve API key and base URL
|
|
api_key = api_key or os.getenv("CLOVIS_API_KEY")
|
|
base_url = base_url or os.getenv("CLOVIS_BASE_URL")
|
|
if not api_key or not base_url:
|
|
console.print(
|
|
"[red]✗ Error:[/red] API key or base URL not provided.\n"
|
|
"Set them via --api-key / --base-url or environment variables CLOVIS_API_KEY and CLOVIS_BASE_URL."
|
|
)
|
|
raise typer.Exit(code=1)
|
|
|
|
# Instantiate RAGChain
|
|
rag = RAGChain(
|
|
chroma_db_path=str(chroma_path_obj),
|
|
collection_name=collection_name,
|
|
top_k=top_k,
|
|
min_score=min_score,
|
|
api_key=api_key,
|
|
base_url=base_url,
|
|
)
|
|
|
|
# Get answer from RAG
|
|
try:
|
|
with console.status("[cyan]Querying LLM...", spinner="dots"):
|
|
answer, used_chunks = rag.answer_query(query)
|
|
except Exception as e:
|
|
console.print(f"[red]✗ Error:[/red] {str(e)}")
|
|
raise typer.Exit(code=1)
|
|
|
|
# Display answer
|
|
console.print("\n[bold green]Answer:[/bold green]\n")
|
|
console.print(answer + "\n")
|
|
|
|
# Display sources used
|
|
if used_chunks:
|
|
sources = ", ".join(f"{c.file_path}#L{c.line_start}-L{c.line_end}" for c in used_chunks)
|
|
console.print(f"[bold cyan]Sources:[/bold cyan] {sources}\n")
|
|
else:
|
|
console.print("[bold cyan]Sources:[/bold cyan] None\n")
|
|
|
|
|
|
def _display_index_results(stats: dict):
|
|
"""
|
|
Display indexing results with rich formatting.
|
|
|
|
Args:
|
|
stats: Statistics dictionary from index_vault
|
|
"""
|
|
files_processed = stats["files_processed"]
|
|
chunks_created = stats["chunks_created"]
|
|
errors = stats["errors"]
|
|
|
|
# Success summary
|
|
console.print(Panel(
|
|
f"[green]✓[/green] Indexing completed\n\n"
|
|
f"Files processed: [cyan]{files_processed}[/cyan]\n"
|
|
f"Chunks created: [cyan]{chunks_created}[/cyan]\n"
|
|
f"Collection: [cyan]{stats['collection_name']}[/cyan]",
|
|
title="[bold]Indexing Results[/bold]",
|
|
border_style="green",
|
|
))
|
|
|
|
# Display errors if any
|
|
if errors:
|
|
console.print(f"\n[yellow]⚠ {len(errors)} file(s) skipped due to errors:[/yellow]\n")
|
|
for error in errors:
|
|
console.print(f" [red]•[/red] {error['file']}: [dim]{error['error']}[/dim]")
|
|
|
|
|
|
def _display_results_compact(results: list[SearchResult]):
|
|
"""
|
|
Display search results in compact format.
|
|
|
|
Args:
|
|
results: List of SearchResult objects
|
|
"""
|
|
for idx, result in enumerate(results, 1):
|
|
# Format score as stars (0-5 scale)
|
|
stars = "⭐" * int(result.score * 5)
|
|
|
|
console.print(f"[bold cyan]{idx}.[/bold cyan] {result.file_path} [dim](score: {result.score:.2f} {stars})[/dim]")
|
|
console.print(
|
|
f" Section: [yellow]{result.section_title}[/yellow] | Lines: [dim]{result.line_start}-{result.line_end}[/dim]")
|
|
|
|
# Truncate text if too long
|
|
text = result.text
|
|
if len(text) > 200:
|
|
text = text[:200] + "..."
|
|
|
|
console.print(f" {text}\n")
|
|
|
|
|
|
def _display_results_panel(results: list[SearchResult]):
|
|
"""
|
|
Display search results in panel format (rich boxes).
|
|
|
|
Args:
|
|
results: List of SearchResult objects
|
|
"""
|
|
# TODO: Implement panel format in future
|
|
console.print("[yellow]Panel format not yet implemented. Using compact format.[/yellow]\n")
|
|
_display_results_compact(results)
|
|
|
|
|
|
def _display_results_table(results: list[SearchResult]):
|
|
"""
|
|
Display search results in table format.
|
|
|
|
Args:
|
|
results: List of SearchResult objects
|
|
"""
|
|
# TODO: Implement table format in future
|
|
console.print("[yellow]Table format not yet implemented. Using compact format.[/yellow]\n")
|
|
_display_results_compact(results)
|
|
|
|
|
|
def main():
|
|
"""
|
|
Entry point for the CLI application.
|
|
"""
|
|
app()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|