239 lines
6.0 KiB
Python
239 lines
6.0 KiB
Python
"""Unit tests for markdown_parser module."""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
from markdown_parser import (
|
|
parse_markdown_file,
|
|
find_section_at_line,
|
|
MarkdownSection,
|
|
ParsedDocument
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_markdown_file(tmp_path):
|
|
"""Fixture to create temporary markdown files for testing.
|
|
|
|
Args:
|
|
tmp_path: pytest temporary directory fixture
|
|
|
|
Returns:
|
|
Function that creates a markdown file with given content
|
|
"""
|
|
|
|
def _create_file(content: str, filename: str = "test.md") -> Path:
|
|
file_path = tmp_path / filename
|
|
file_path.write_text(content, encoding="utf-8")
|
|
return file_path
|
|
|
|
return _create_file
|
|
|
|
|
|
# Tests for parse_markdown_file()
|
|
|
|
def test_i_can_parse_file_with_single_section(tmp_markdown_file):
|
|
"""Test parsing a file with a single header section."""
|
|
content = """# Main Title
|
|
This is the content of the section.
|
|
It has multiple lines."""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert len(doc.sections) == 1
|
|
assert doc.sections[0].level == 1
|
|
assert doc.sections[0].title == "Main Title"
|
|
assert "This is the content" in doc.sections[0].content
|
|
assert doc.sections[0].start_line == 1
|
|
assert doc.sections[0].end_line == 3
|
|
|
|
|
|
def test_i_can_parse_file_with_multiple_sections(tmp_markdown_file):
|
|
"""Test parsing a file with multiple sections at the same level."""
|
|
content = """# Section One
|
|
Content of section one.
|
|
|
|
# Section Two
|
|
Content of section two.
|
|
|
|
# Section Three
|
|
Content of section three."""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert len(doc.sections) == 3
|
|
assert doc.sections[0].title == "Section One"
|
|
assert doc.sections[1].title == "Section Two"
|
|
assert doc.sections[2].title == "Section Three"
|
|
assert all(section.level == 1 for section in doc.sections)
|
|
|
|
|
|
def test_i_can_parse_file_with_nested_sections(tmp_markdown_file):
|
|
"""Test parsing a file with nested headers (different levels)."""
|
|
content = """# Main Title
|
|
Introduction text.
|
|
|
|
## Subsection A
|
|
Content A.
|
|
|
|
## Subsection B
|
|
Content B.
|
|
|
|
### Sub-subsection
|
|
Nested content."""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert len(doc.sections) == 4
|
|
assert doc.sections[0].level == 1
|
|
assert doc.sections[0].title == "Main Title"
|
|
assert doc.sections[1].level == 2
|
|
assert doc.sections[1].title == "Subsection A"
|
|
assert doc.sections[2].level == 2
|
|
assert doc.sections[2].title == "Subsection B"
|
|
assert doc.sections[3].level == 3
|
|
assert doc.sections[3].title == "Sub-subsection"
|
|
|
|
|
|
def test_i_can_parse_file_without_headers(tmp_markdown_file):
|
|
"""Test parsing a file with no headers (plain text)."""
|
|
content = """This is a plain text file.
|
|
It has no headers at all.
|
|
Just regular content."""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert len(doc.sections) == 1
|
|
assert doc.sections[0].level == 0
|
|
assert doc.sections[0].title == ""
|
|
assert doc.sections[0].content == content
|
|
assert doc.sections[0].start_line == 1
|
|
assert doc.sections[0].end_line == 3
|
|
|
|
|
|
def test_i_can_parse_empty_file(tmp_markdown_file):
|
|
"""Test parsing an empty file."""
|
|
content = ""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert len(doc.sections) == 1
|
|
assert doc.sections[0].level == 0
|
|
assert doc.sections[0].title == ""
|
|
assert doc.sections[0].content == ""
|
|
assert doc.sections[0].start_line == 1
|
|
assert doc.sections[0].end_line == 1
|
|
|
|
|
|
def test_i_can_track_correct_line_numbers(tmp_markdown_file):
|
|
"""Test that line numbers are correctly tracked for each section."""
|
|
content = """# First Section
|
|
Line 2
|
|
Line 3
|
|
|
|
# Second Section
|
|
Line 6
|
|
Line 7
|
|
Line 8"""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
assert doc.sections[0].start_line == 1
|
|
assert doc.sections[0].end_line == 4
|
|
assert doc.sections[1].start_line == 5
|
|
assert doc.sections[1].end_line == 8
|
|
|
|
|
|
def test_i_cannot_parse_nonexistent_file():
|
|
"""Test that parsing a non-existent file raises FileNotFoundError."""
|
|
fake_path = Path("/nonexistent/path/to/file.md")
|
|
|
|
with pytest.raises(FileNotFoundError):
|
|
parse_markdown_file(fake_path)
|
|
|
|
|
|
# Tests for find_section_at_line()
|
|
|
|
def test_i_can_find_section_at_specific_line(tmp_markdown_file):
|
|
"""Test finding a section at a line in the middle of content."""
|
|
content = """# Section One
|
|
Line 2
|
|
Line 3
|
|
|
|
# Section Two
|
|
Line 6
|
|
Line 7"""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
section = find_section_at_line(doc, 3)
|
|
|
|
assert section is not None
|
|
assert section.title == "Section One"
|
|
|
|
section = find_section_at_line(doc, 6)
|
|
|
|
assert section is not None
|
|
assert section.title == "Section Two"
|
|
|
|
|
|
def test_i_can_find_section_at_first_line(tmp_markdown_file):
|
|
"""Test finding a section at the header line itself."""
|
|
content = """# Main Title
|
|
Content here."""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
section = find_section_at_line(doc, 1)
|
|
|
|
assert section is not None
|
|
assert section.title == "Main Title"
|
|
|
|
|
|
def test_i_can_find_section_at_last_line(tmp_markdown_file):
|
|
"""Test finding a section at its last line."""
|
|
content = """# Section One
|
|
Line 2
|
|
Line 3
|
|
|
|
# Section Two
|
|
Line 6"""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
section = find_section_at_line(doc, 3)
|
|
|
|
assert section is not None
|
|
assert section.title == "Section One"
|
|
|
|
section = find_section_at_line(doc, 6)
|
|
|
|
assert section is not None
|
|
assert section.title == "Section Two"
|
|
|
|
|
|
def test_i_cannot_find_section_for_invalid_line_number(tmp_markdown_file):
|
|
"""Test that invalid line numbers return None."""
|
|
content = """# Title
|
|
Content"""
|
|
|
|
file_path = tmp_markdown_file(content)
|
|
doc = parse_markdown_file(file_path)
|
|
|
|
# Negative line number
|
|
assert find_section_at_line(doc, -1) is None
|
|
|
|
# Zero line number
|
|
assert find_section_at_line(doc, 0) is None
|
|
|
|
# Line number beyond file length
|
|
assert find_section_at_line(doc, 1000) is None
|