Initial commit
This commit is contained in:
403
obsidian_rag/cli.py
Normal file
403
obsidian_rag/cli.py
Normal file
@@ -0,0 +1,403 @@
|
||||
"""
|
||||
CLI module for Obsidian RAG Backend.
|
||||
|
||||
Provides command-line interface for indexing and searching the Obsidian vault.
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
|
||||
|
||||
from indexer import index_vault
|
||||
from rag_chain import RAGChain
|
||||
from searcher import search_vault, SearchResult
|
||||
|
||||
app = typer.Typer(
|
||||
name="obsidian-rag",
|
||||
help="Local semantic search backend for Obsidian markdown files",
|
||||
add_completion=False,
|
||||
)
|
||||
console = Console()
|
||||
|
||||
# Default ChromaDB path
|
||||
DEFAULT_CHROMA_PATH = Path.home() / ".obsidian_rag" / "chroma_db"
|
||||
|
||||
|
||||
def _truncate_path(path: str, max_len: int = 60) -> str:
|
||||
"""Return a truncated version of the file path if too long."""
|
||||
if len(path) <= max_len:
|
||||
return path
|
||||
return "..." + path[-(max_len - 3):]
|
||||
|
||||
|
||||
@app.command()
|
||||
def index(
|
||||
vault_path: str = typer.Argument(
|
||||
...,
|
||||
help="Path to the Obsidian vault directory",
|
||||
),
|
||||
chroma_path: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--chroma-path",
|
||||
"-c",
|
||||
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
||||
),
|
||||
collection_name: str = typer.Option(
|
||||
"obsidian_vault",
|
||||
"--collection",
|
||||
help="Name of the ChromaDB collection",
|
||||
),
|
||||
max_chunk_tokens: int = typer.Option(
|
||||
200,
|
||||
"--max-tokens",
|
||||
help="Maximum tokens per chunk",
|
||||
),
|
||||
overlap_tokens: int = typer.Option(
|
||||
30,
|
||||
"--overlap",
|
||||
help="Number of overlapping tokens between chunks",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Index all markdown files from the Obsidian vault into ChromaDB.
|
||||
"""
|
||||
vault_path_obj = Path(vault_path)
|
||||
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
||||
|
||||
if not vault_path_obj.exists():
|
||||
console.print(f"[red]✗ Error:[/red] Vault path does not exist: {vault_path}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
if not vault_path_obj.is_dir():
|
||||
console.print(f"[red]✗ Error:[/red] Vault path is not a directory: {vault_path}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
chroma_path_obj.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
md_files = list(vault_path_obj.rglob("*.md"))
|
||||
total_files = len(md_files)
|
||||
|
||||
if total_files == 0:
|
||||
console.print(f"[yellow]⚠ Warning:[/yellow] No markdown files found in {vault_path}")
|
||||
raise typer.Exit(code=0)
|
||||
|
||||
console.print(f"\n[cyan]Found {total_files} markdown files to index[/cyan]\n")
|
||||
|
||||
# One single stable progress bar
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
TaskProgressColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
|
||||
main_task = progress.add_task("[cyan]Indexing vault...", total=total_files)
|
||||
|
||||
# Create a separate status line below the progress bar
|
||||
status_line = console.status("[dim]Preparing first file...")
|
||||
|
||||
def progress_callback(current_file: str, files_processed: int, total: int):
|
||||
"""Update progress bar and status message."""
|
||||
progress.update(main_task, completed=files_processed)
|
||||
|
||||
short_file = _truncate_path(current_file)
|
||||
status_line.update(f"[dim]Processing: {short_file}")
|
||||
|
||||
try:
|
||||
with status_line:
|
||||
stats = index_vault(
|
||||
vault_path=str(vault_path_obj),
|
||||
chroma_db_path=str(chroma_path_obj),
|
||||
collection_name=collection_name,
|
||||
max_chunk_tokens=max_chunk_tokens,
|
||||
overlap_tokens=overlap_tokens,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
progress.update(main_task, completed=total_files)
|
||||
status_line.update("[green]✓ Completed")
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"\n[red]✗ Error during indexing:[/red] {str(e)}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
console.print()
|
||||
_display_index_results(stats)
|
||||
|
||||
|
||||
@app.command()
|
||||
def search(
|
||||
query: str = typer.Argument(
|
||||
...,
|
||||
help="Search query",
|
||||
),
|
||||
chroma_path: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--chroma-path",
|
||||
"-c",
|
||||
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
||||
),
|
||||
collection_name: str = typer.Option(
|
||||
"obsidian_vault",
|
||||
"--collection",
|
||||
help="Name of the ChromaDB collection",
|
||||
),
|
||||
limit: int = typer.Option(
|
||||
5,
|
||||
"--limit",
|
||||
"-l",
|
||||
help="Maximum number of results to return",
|
||||
),
|
||||
min_score: float = typer.Option(
|
||||
0.0,
|
||||
"--min-score",
|
||||
"-s",
|
||||
help="Minimum similarity score (0.0 to 1.0)",
|
||||
),
|
||||
format: str = typer.Option(
|
||||
"compact",
|
||||
"--format",
|
||||
"-f",
|
||||
help="Output format: compact (default), panel, table",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Search the indexed vault for semantically similar content.
|
||||
|
||||
Returns relevant sections from your Obsidian notes based on
|
||||
semantic similarity to the query.
|
||||
"""
|
||||
# Resolve paths
|
||||
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
||||
|
||||
# Validate chroma path exists
|
||||
if not chroma_path_obj.exists():
|
||||
console.print(
|
||||
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
|
||||
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Validate format
|
||||
valid_formats = ["compact", "panel", "table"]
|
||||
if format not in valid_formats:
|
||||
console.print(f"[red]✗ Error:[/red] Invalid format '{format}'. Valid options: {', '.join(valid_formats)}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Perform search
|
||||
try:
|
||||
with console.status("[cyan]Searching...", spinner="dots"):
|
||||
results = search_vault(
|
||||
query=query,
|
||||
chroma_db_path=str(chroma_path_obj),
|
||||
collection_name=collection_name,
|
||||
limit=limit,
|
||||
min_score=min_score,
|
||||
)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]✗ Error:[/red] {str(e)}")
|
||||
raise typer.Exit(code=1)
|
||||
except Exception as e:
|
||||
console.print(f"[red]✗ Unexpected error:[/red] {str(e)}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Display results
|
||||
if not results:
|
||||
console.print(f"\n[yellow]No results found for query:[/yellow] '{query}'")
|
||||
if min_score > 0:
|
||||
console.print(f"[dim]Try lowering --min-score (currently {min_score})[/dim]")
|
||||
raise typer.Exit(code=0)
|
||||
|
||||
console.print(f"\n[cyan]Found {len(results)} result(s) for:[/cyan] '{query}'\n")
|
||||
|
||||
# Display with selected format
|
||||
if format == "compact":
|
||||
_display_results_compact(results)
|
||||
elif format == "panel":
|
||||
_display_results_panel(results)
|
||||
elif format == "table":
|
||||
_display_results_table(results)
|
||||
|
||||
|
||||
@app.command()
|
||||
def ask(
|
||||
query: str = typer.Argument(
|
||||
...,
|
||||
help="Question to ask the LLM based on your Obsidian notes."
|
||||
),
|
||||
chroma_path: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--chroma-path",
|
||||
"-c",
|
||||
help=f"Path to ChromaDB storage (default: {DEFAULT_CHROMA_PATH})",
|
||||
),
|
||||
collection_name: str = typer.Option(
|
||||
"obsidian_vault",
|
||||
"--collection",
|
||||
help="Name of the ChromaDB collection",
|
||||
),
|
||||
top_k: int = typer.Option(
|
||||
5,
|
||||
"--top-k",
|
||||
"-k",
|
||||
help="Number of top chunks to use for context",
|
||||
),
|
||||
min_score: float = typer.Option(
|
||||
0.0,
|
||||
"--min-score",
|
||||
"-s",
|
||||
help="Minimum similarity score for chunks",
|
||||
),
|
||||
api_key: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--api-key",
|
||||
help="Clovis API key (or set CLOVIS_API_KEY environment variable)",
|
||||
),
|
||||
base_url: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--base-url",
|
||||
help="Clovis base URL (or set CLOVIS_BASE_URL environment variable)",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Ask a question to the LLM using RAG over your Obsidian vault.
|
||||
"""
|
||||
|
||||
# Resolve ChromaDB path
|
||||
chroma_path_obj = Path(chroma_path) if chroma_path else DEFAULT_CHROMA_PATH
|
||||
if not chroma_path_obj.exists():
|
||||
console.print(
|
||||
f"[red]✗ Error:[/red] ChromaDB not found at {chroma_path_obj}\n"
|
||||
f"Please run 'obsidian-rag index <vault_path>' first to create the index."
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Resolve API key and base URL
|
||||
api_key = api_key or os.getenv("CLOVIS_API_KEY")
|
||||
base_url = base_url or os.getenv("CLOVIS_BASE_URL")
|
||||
if not api_key or not base_url:
|
||||
console.print(
|
||||
"[red]✗ Error:[/red] API key or base URL not provided.\n"
|
||||
"Set them via --api-key / --base-url or environment variables CLOVIS_API_KEY and CLOVIS_BASE_URL."
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Instantiate RAGChain
|
||||
rag = RAGChain(
|
||||
chroma_db_path=str(chroma_path_obj),
|
||||
collection_name=collection_name,
|
||||
top_k=top_k,
|
||||
min_score=min_score,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
# Get answer from RAG
|
||||
try:
|
||||
with console.status("[cyan]Querying LLM...", spinner="dots"):
|
||||
answer, used_chunks = rag.answer_query(query)
|
||||
except Exception as e:
|
||||
console.print(f"[red]✗ Error:[/red] {str(e)}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Display answer
|
||||
console.print("\n[bold green]Answer:[/bold green]\n")
|
||||
console.print(answer + "\n")
|
||||
|
||||
# Display sources used
|
||||
if used_chunks:
|
||||
sources = ", ".join(f"{c.file_path}#L{c.line_start}-L{c.line_end}" for c in used_chunks)
|
||||
console.print(f"[bold cyan]Sources:[/bold cyan] {sources}\n")
|
||||
else:
|
||||
console.print("[bold cyan]Sources:[/bold cyan] None\n")
|
||||
|
||||
|
||||
def _display_index_results(stats: dict):
|
||||
"""
|
||||
Display indexing results with rich formatting.
|
||||
|
||||
Args:
|
||||
stats: Statistics dictionary from index_vault
|
||||
"""
|
||||
files_processed = stats["files_processed"]
|
||||
chunks_created = stats["chunks_created"]
|
||||
errors = stats["errors"]
|
||||
|
||||
# Success summary
|
||||
console.print(Panel(
|
||||
f"[green]✓[/green] Indexing completed\n\n"
|
||||
f"Files processed: [cyan]{files_processed}[/cyan]\n"
|
||||
f"Chunks created: [cyan]{chunks_created}[/cyan]\n"
|
||||
f"Collection: [cyan]{stats['collection_name']}[/cyan]",
|
||||
title="[bold]Indexing Results[/bold]",
|
||||
border_style="green",
|
||||
))
|
||||
|
||||
# Display errors if any
|
||||
if errors:
|
||||
console.print(f"\n[yellow]⚠ {len(errors)} file(s) skipped due to errors:[/yellow]\n")
|
||||
for error in errors:
|
||||
console.print(f" [red]•[/red] {error['file']}: [dim]{error['error']}[/dim]")
|
||||
|
||||
|
||||
def _display_results_compact(results: list[SearchResult]):
|
||||
"""
|
||||
Display search results in compact format.
|
||||
|
||||
Args:
|
||||
results: List of SearchResult objects
|
||||
"""
|
||||
for idx, result in enumerate(results, 1):
|
||||
# Format score as stars (0-5 scale)
|
||||
stars = "⭐" * int(result.score * 5)
|
||||
|
||||
console.print(f"[bold cyan]{idx}.[/bold cyan] {result.file_path} [dim](score: {result.score:.2f} {stars})[/dim]")
|
||||
console.print(
|
||||
f" Section: [yellow]{result.section_title}[/yellow] | Lines: [dim]{result.line_start}-{result.line_end}[/dim]")
|
||||
|
||||
# Truncate text if too long
|
||||
text = result.text
|
||||
if len(text) > 200:
|
||||
text = text[:200] + "..."
|
||||
|
||||
console.print(f" {text}\n")
|
||||
|
||||
|
||||
def _display_results_panel(results: list[SearchResult]):
|
||||
"""
|
||||
Display search results in panel format (rich boxes).
|
||||
|
||||
Args:
|
||||
results: List of SearchResult objects
|
||||
"""
|
||||
# TODO: Implement panel format in future
|
||||
console.print("[yellow]Panel format not yet implemented. Using compact format.[/yellow]\n")
|
||||
_display_results_compact(results)
|
||||
|
||||
|
||||
def _display_results_table(results: list[SearchResult]):
|
||||
"""
|
||||
Display search results in table format.
|
||||
|
||||
Args:
|
||||
results: List of SearchResult objects
|
||||
"""
|
||||
# TODO: Implement table format in future
|
||||
console.print("[yellow]Table format not yet implemented. Using compact format.[/yellow]\n")
|
||||
_display_results_compact(results)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Entry point for the CLI application.
|
||||
"""
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user