Files
MyObsidianAI/tests/test_markdown_parser.py
Kodjo Sossouvi d4925f7969 Initial commit
2025-12-12 11:31:44 +01:00

239 lines
6.0 KiB
Python

"""Unit tests for markdown_parser module."""
import pytest
from pathlib import Path
from markdown_parser import (
parse_markdown_file,
find_section_at_line,
MarkdownSection,
ParsedDocument
)
@pytest.fixture
def tmp_markdown_file(tmp_path):
"""Fixture to create temporary markdown files for testing.
Args:
tmp_path: pytest temporary directory fixture
Returns:
Function that creates a markdown file with given content
"""
def _create_file(content: str, filename: str = "test.md") -> Path:
file_path = tmp_path / filename
file_path.write_text(content, encoding="utf-8")
return file_path
return _create_file
# Tests for parse_markdown_file()
def test_i_can_parse_file_with_single_section(tmp_markdown_file):
"""Test parsing a file with a single header section."""
content = """# Main Title
This is the content of the section.
It has multiple lines."""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert len(doc.sections) == 1
assert doc.sections[0].level == 1
assert doc.sections[0].title == "Main Title"
assert "This is the content" in doc.sections[0].content
assert doc.sections[0].start_line == 1
assert doc.sections[0].end_line == 3
def test_i_can_parse_file_with_multiple_sections(tmp_markdown_file):
"""Test parsing a file with multiple sections at the same level."""
content = """# Section One
Content of section one.
# Section Two
Content of section two.
# Section Three
Content of section three."""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert len(doc.sections) == 3
assert doc.sections[0].title == "Section One"
assert doc.sections[1].title == "Section Two"
assert doc.sections[2].title == "Section Three"
assert all(section.level == 1 for section in doc.sections)
def test_i_can_parse_file_with_nested_sections(tmp_markdown_file):
"""Test parsing a file with nested headers (different levels)."""
content = """# Main Title
Introduction text.
## Subsection A
Content A.
## Subsection B
Content B.
### Sub-subsection
Nested content."""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert len(doc.sections) == 4
assert doc.sections[0].level == 1
assert doc.sections[0].title == "Main Title"
assert doc.sections[1].level == 2
assert doc.sections[1].title == "Subsection A"
assert doc.sections[2].level == 2
assert doc.sections[2].title == "Subsection B"
assert doc.sections[3].level == 3
assert doc.sections[3].title == "Sub-subsection"
def test_i_can_parse_file_without_headers(tmp_markdown_file):
"""Test parsing a file with no headers (plain text)."""
content = """This is a plain text file.
It has no headers at all.
Just regular content."""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert len(doc.sections) == 1
assert doc.sections[0].level == 0
assert doc.sections[0].title == ""
assert doc.sections[0].content == content
assert doc.sections[0].start_line == 1
assert doc.sections[0].end_line == 3
def test_i_can_parse_empty_file(tmp_markdown_file):
"""Test parsing an empty file."""
content = ""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert len(doc.sections) == 1
assert doc.sections[0].level == 0
assert doc.sections[0].title == ""
assert doc.sections[0].content == ""
assert doc.sections[0].start_line == 1
assert doc.sections[0].end_line == 1
def test_i_can_track_correct_line_numbers(tmp_markdown_file):
"""Test that line numbers are correctly tracked for each section."""
content = """# First Section
Line 2
Line 3
# Second Section
Line 6
Line 7
Line 8"""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
assert doc.sections[0].start_line == 1
assert doc.sections[0].end_line == 4
assert doc.sections[1].start_line == 5
assert doc.sections[1].end_line == 8
def test_i_cannot_parse_nonexistent_file():
"""Test that parsing a non-existent file raises FileNotFoundError."""
fake_path = Path("/nonexistent/path/to/file.md")
with pytest.raises(FileNotFoundError):
parse_markdown_file(fake_path)
# Tests for find_section_at_line()
def test_i_can_find_section_at_specific_line(tmp_markdown_file):
"""Test finding a section at a line in the middle of content."""
content = """# Section One
Line 2
Line 3
# Section Two
Line 6
Line 7"""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
section = find_section_at_line(doc, 3)
assert section is not None
assert section.title == "Section One"
section = find_section_at_line(doc, 6)
assert section is not None
assert section.title == "Section Two"
def test_i_can_find_section_at_first_line(tmp_markdown_file):
"""Test finding a section at the header line itself."""
content = """# Main Title
Content here."""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
section = find_section_at_line(doc, 1)
assert section is not None
assert section.title == "Main Title"
def test_i_can_find_section_at_last_line(tmp_markdown_file):
"""Test finding a section at its last line."""
content = """# Section One
Line 2
Line 3
# Section Two
Line 6"""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
section = find_section_at_line(doc, 3)
assert section is not None
assert section.title == "Section One"
section = find_section_at_line(doc, 6)
assert section is not None
assert section.title == "Section Two"
def test_i_cannot_find_section_for_invalid_line_number(tmp_markdown_file):
"""Test that invalid line numbers return None."""
content = """# Title
Content"""
file_path = tmp_markdown_file(content)
doc = parse_markdown_file(file_path)
# Negative line number
assert find_section_at_line(doc, -1) is None
# Zero line number
assert find_section_at_line(doc, 0) is None
# Line number beyond file length
assert find_section_at_line(doc, 1000) is None