MyObsidianAI/tests/test_cli.py

"""
Unit tests for the CLI module.
"""
import pytest
from pathlib import Path
from typer.testing import CliRunner
from obsidian_rag.cli import app, _display_index_results, _display_results_compact
from obsidian_rag.indexer import index_vault
from obsidian_rag.searcher import SearchResult

runner = CliRunner()


@pytest.fixture
def temp_vault(tmp_path):
  """
  Create a temporary vault with sample markdown files.
  """
  vault_path = tmp_path / "test_vault"
  vault_path.mkdir()

  # Create sample files
  file1 = vault_path / "python.md"
  file1.write_text("""# Python Programming

Python is a high-level programming language.

## Features

Python has dynamic typing and automatic memory management.
""")

  file2 = vault_path / "javascript.md"
  file2.write_text("""# JavaScript

JavaScript is a scripting language for web development.

## Usage

JavaScript runs in web browsers and Node.js environments.
""")

  file3 = vault_path / "cooking.md"
  file3.write_text("""# Cooking Tips

Learn how to cook delicious meals.

## Basics

Start with simple recipes and basic techniques.
""")

  return vault_path


# Tests for 'index' command - Passing tests


def test_i_can_index_vault_successfully(temp_vault, tmp_path):
  """
  Test that we can index a vault successfully.
  """
  chroma_path = tmp_path / "chroma_db"

  result = runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  assert result.exit_code == 0
  assert "Found 3 markdown files to index" in result.stdout
  assert "Indexing completed" in result.stdout
  assert "Files processed:" in result.stdout
  assert "Chunks created:" in result.stdout


def test_i_can_index_with_custom_chroma_path(temp_vault, tmp_path):
  """
  Test that we can specify a custom ChromaDB path.
  """
  custom_chroma = tmp_path / "my_custom_db"

  result = runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(custom_chroma),
  ])

  assert result.exit_code == 0
  assert custom_chroma.exists()
  assert (custom_chroma / "chroma.sqlite3").exists()


def test_i_can_index_with_custom_collection_name(temp_vault, tmp_path):
  """
  Test that we can use a custom collection name.
  """
  chroma_path = tmp_path / "chroma_db"
  collection_name = "my_custom_collection"

  result = runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
      "--collection", collection_name,
  ])

  assert result.exit_code == 0
  assert f"Collection: {collection_name}" in result.stdout


def test_i_can_see_errors_in_index_results(tmp_path):
  """
  Test that errors during indexing are displayed.
  """
  vault_path = tmp_path / "vault_with_errors"
  vault_path.mkdir()

  # Create a valid file
  valid_file = vault_path / "valid.md"
  valid_file.write_text("# Valid File\n\nThis is valid content.")

  # Create an invalid file (will cause parsing error)
  invalid_file = vault_path / "invalid.md"
  invalid_file.write_bytes(b"\xff\xfe\x00\x00")  # Invalid UTF-8

  chroma_path = tmp_path / "chroma_db"

  result = runner.invoke(app, [
      "index",
      str(vault_path),
      "--chroma-path", str(chroma_path),
  ])

  # Should still complete (exit code 0) but show errors
  assert result.exit_code == 0
  assert "Indexing completed" in result.stdout
  # Note: Error display might vary, just check it completed


# Tests for 'index' command - Error tests


def test_i_cannot_index_nonexistent_vault(tmp_path):
  """
  Test that indexing a nonexistent vault fails with clear error.
  """
  nonexistent_path = tmp_path / "does_not_exist"
  chroma_path = tmp_path / "chroma_db"

  result = runner.invoke(app, [
      "index",
      str(nonexistent_path),
      "--chroma-path", str(chroma_path),
  ])

  assert result.exit_code == 1
  assert "does not exist" in result.stdout


def test_i_cannot_index_file_instead_of_directory(tmp_path):
  """
  Test that indexing a file (not directory) fails.
  """
  file_path = tmp_path / "somefile.txt"
  file_path.write_text("I am a file")
  chroma_path = tmp_path / "chroma_db"

  result = runner.invoke(app, [
      "index",
      str(file_path),
      "--chroma-path", str(chroma_path),
  ])

  assert result.exit_code == 1
  assert "not a directory" in result.stdout


def test_i_can_handle_empty_vault_gracefully(tmp_path):
  """
  Test that an empty vault (no .md files) is handled gracefully.
  """
  empty_vault = tmp_path / "empty_vault"
  empty_vault.mkdir()

  # Create a non-markdown file
  (empty_vault / "readme.txt").write_text("Not a markdown file")

  chroma_path = tmp_path / "chroma_db"

  result = runner.invoke(app, [
      "index",
      str(empty_vault),
      "--chroma-path", str(chroma_path),
  ])

  assert result.exit_code == 0
  assert "No markdown files found" in result.stdout


# Tests for 'search' command - Passing tests


def test_i_can_search_indexed_vault(temp_vault, tmp_path):
  """
  Test that we can search an indexed vault.
  """
  chroma_path = tmp_path / "chroma_db"

  # First, index the vault
  index_result = runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])
  assert index_result.exit_code == 0

  # Then search
  search_result = runner.invoke(app, [
      "search",
      "Python programming",
      "--chroma-path", str(chroma_path),
  ])

  assert search_result.exit_code == 0
  assert "Found" in search_result.stdout
  assert "result(s) for:" in search_result.stdout
  assert "python.md" in search_result.stdout


def test_i_can_search_with_limit_option(temp_vault, tmp_path):
  """
  Test that the --limit option works.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search with limit
  result = runner.invoke(app, [
      "search",
      "programming",
      "--chroma-path", str(chroma_path),
      "--limit", "2",
  ])

  assert result.exit_code == 0
  # Count result numbers (1., 2., etc.)
  result_count = result.stdout.count("[bold cyan]")
  assert result_count <= 2


def test_i_can_search_with_min_score_option(temp_vault, tmp_path):
  """
  Test that the --min-score option works.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search with high min-score
  result = runner.invoke(app, [
      "search",
      "Python",
      "--chroma-path", str(chroma_path),
      "--min-score", "0.5",
  ])

  assert result.exit_code == 0
  # Should have results (Python file should match well)
  assert "Found" in result.stdout


def test_i_can_search_with_custom_collection(temp_vault, tmp_path):
  """
  Test that we can search in a custom collection.
  """
  chroma_path = tmp_path / "chroma_db"
  collection_name = "test_collection"

  # Index with custom collection
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
      "--collection", collection_name,
  ])

  # Search in same collection
  result = runner.invoke(app, [
      "search",
      "Python",
      "--chroma-path", str(chroma_path),
      "--collection", collection_name,
  ])

  assert result.exit_code == 0
  assert "Found" in result.stdout


def test_i_can_handle_no_results_gracefully(temp_vault, tmp_path):
  """
  Test that no results scenario is handled gracefully.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search for something unlikely with high threshold
  result = runner.invoke(app, [
      "search",
      "quantum physics relativity",
      "--chroma-path", str(chroma_path),
      "--min-score", "0.95",
  ])

  assert result.exit_code == 0
  assert "No results found" in result.stdout


def test_i_can_use_compact_format(temp_vault, tmp_path):
  """
  Test that compact format displays correctly.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search with explicit compact format
  result = runner.invoke(app, [
      "search",
      "Python",
      "--chroma-path", str(chroma_path),
      "--format", "compact",
  ])

  assert result.exit_code == 0
  # Check for compact format elements
  assert "Section:" in result.stdout
  assert "Lines:" in result.stdout
  assert "score:" in result.stdout


# Tests for 'search' command - Error tests


def test_i_cannot_search_without_index(tmp_path):
  """
  Test that searching without indexing fails with clear message.
  """
  chroma_path = tmp_path / "nonexistent_chroma"

  result = runner.invoke(app, [
      "search",
      "test query",
      "--chroma-path", str(chroma_path),
  ])

  assert result.exit_code == 1
  assert "not found" in result.stdout
  assert "index" in result.stdout.lower()


def test_i_cannot_search_nonexistent_collection(temp_vault, tmp_path):
  """
  Test that searching in a nonexistent collection fails.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index with default collection
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search in different collection
  result = runner.invoke(app, [
      "search",
      "Python",
      "--chroma-path", str(chroma_path),
      "--collection", "nonexistent_collection",
  ])

  assert result.exit_code == 1
  assert "not found" in result.stdout


def test_i_cannot_use_invalid_format(temp_vault, tmp_path):
  """
  Test that an invalid format is rejected.
  """
  chroma_path = tmp_path / "chroma_db"

  # Index
  runner.invoke(app, [
      "index",
      str(temp_vault),
      "--chroma-path", str(chroma_path),
  ])

  # Search with invalid format
  result = runner.invoke(app, [
      "search",
      "Python",
      "--chroma-path", str(chroma_path),
      "--format", "invalid_format",
  ])

  assert result.exit_code == 1
  assert "Invalid format" in result.stdout
  assert "compact" in result.stdout


# Tests for helper functions


def test_i_can_display_index_results(capsys):
  """
  Test that index results are displayed correctly.
  """
  stats = {
      "files_processed": 10,
      "chunks_created": 50,
      "collection_name": "test_collection",
      "errors": [],
  }

  _display_index_results(stats)

  captured = capsys.readouterr()
  assert "Indexing completed" in captured.out
  assert "10" in captured.out
  assert "50" in captured.out
  assert "test_collection" in captured.out


def test_i_can_display_index_results_with_errors(capsys):
  """
  Test that index results with errors are displayed correctly.
  """
  stats = {
      "files_processed": 8,
      "chunks_created": 40,
      "collection_name": "test_collection",
      "errors": [
          {"file": "broken.md", "error": "Invalid encoding"},
          {"file": "corrupt.md", "error": "Parse error"},
      ],
  }

  _display_index_results(stats)

  captured = capsys.readouterr()
  assert "Indexing completed" in captured.out
  assert "2 file(s) skipped" in captured.out
  assert "broken.md" in captured.out
  assert "Invalid encoding" in captured.out


def test_i_can_display_results_compact(capsys):
  """
  Test that compact results display correctly.
  """
  results = [
      SearchResult(
        file_path="notes/python.md",
        section_title="Introduction",
        line_start=1,
        line_end=5,
        score=0.87,
        text="Python is a high-level programming language.",
      ),
      SearchResult(
        file_path="notes/javascript.md",
        section_title="Overview",
        line_start=10,
        line_end=15,
        score=0.65,
        text="JavaScript is used for web development.",
      ),
  ]

  _display_results_compact(results)

  captured = capsys.readouterr()
  assert "python.md" in captured.out
  assert "javascript.md" in captured.out
  assert "0.87" in captured.out
  assert "0.65" in captured.out
  assert "Introduction" in captured.out
  assert "Overview" in captured.out


def test_i_can_display_results_compact_with_long_text(capsys):
  """
  Test that long text is truncated in compact display.
  """
  long_text = "A" * 300  # Text longer than 200 characters

  results = [
      SearchResult(
        file_path="notes/long.md",
        section_title="Long Section",
        line_start=1,
        line_end=10,
        score=0.75,
        text=long_text,
      ),
  ]

  _display_results_compact(results)

  captured = capsys.readouterr()
  assert "..." in captured.out  # Should be truncated
  assert len([line for line in captured.out.split('\n') if 'A' * 200 in line]) == 0  # Full text not shown