From d77c0ba5259ff7ef7ed6c88374df964c8d83aab1 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sat, 11 Oct 2025 22:34:05 +0200 Subject: [PATCH] Added Ollama --- ollama/Readme.md | 541 ++++++++++++++++++++++++++++++++++++++ ollama/docker-compose.yml | 33 +++ 2 files changed, 574 insertions(+) create mode 100644 ollama/Readme.md create mode 100644 ollama/docker-compose.yml diff --git a/ollama/Readme.md b/ollama/Readme.md new file mode 100644 index 0000000..b4055ab --- /dev/null +++ b/ollama/Readme.md @@ -0,0 +1,541 @@ +# Ollama Docker Setup 🦙 (WSL2 + Windows 11) + +Complete guide for running Ollama with Docker Compose and GPU acceleration on WSL2. + +## 📋 Table of Contents + +- [Prerequisites](#prerequisites) +- [WSL2 Setup](#wsl2-setup) +- [Installation](#installation) +- [Starting Ollama](#starting-ollama) +- [Model Management](#model-management) +- [Usage Examples](#usage-examples) +- [API Reference](#api-reference) +- [Troubleshooting](#troubleshooting) +- [Performance Tips](#performance-tips) + +## 🔧 Prerequisites + +### Required Software + +- **Windows 11** with WSL2 enabled +- **Ubuntu 24.04** on WSL2 +- **Docker Desktop for Windows** with WSL2 backend +- **NVIDIA GPU** with CUDA support (RTX series recommended) +- **NVIDIA Driver** for Windows (latest version) + +### System Requirements + +- Windows 11 Build 22000 or higher +- 16GB RAM minimum (32GB recommended for larger models) +- 50GB+ free disk space for models +- NVIDIA GPU with 8GB+ VRAM + +## 🪟 WSL2 Setup + +### 1. Enable WSL2 (if not already done) + +```powershell +# Run in PowerShell as Administrator +wsl --install +wsl --set-default-version 2 + +# Install Ubuntu 24.04 +wsl --install -d Ubuntu-24.04 + +# Verify WSL2 is active +wsl --list --verbose +``` + +### 2. Install Docker Desktop for Windows + +1. Download from [Docker Desktop](https://www.docker.com/products/docker-desktop) +2. Install and enable **WSL2 backend** in settings +3. Enable integration with Ubuntu-24.04 distro in: Settings → Resources → WSL Integration + +### 3. Verify GPU Support in WSL2 + +```bash +# Open WSL2 Ubuntu terminal +wsl + +# Check NVIDIA driver +nvidia-smi + +# You should see your GPU listed +``` + +**Important**: You do NOT need to install NVIDIA Container Toolkit in WSL2. Docker Desktop handles GPU passthrough automatically. + +### 4. Test Docker GPU Access + +```bash +# In WSL2 terminal +docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi +``` + +If this works, you're ready to go! 🎉 + +## 🚀 Installation + +### 1. Create Project Structure in WSL2 + +```bash +# Open WSL2 terminal +wsl + +# Create project directory +mkdir -p ~/ollama-docker +cd ~/ollama-docker +``` + +### 2. Create `docker-compose.yml` + +Use the provided `docker-compose.yml` file with the WSL2 path: +- Windows path: `E:\volumes\ollama\data` +- WSL2 path: `/mnt/e/volumes/ollama/data` + +### 3. Create Volume Directory + +```bash +# From WSL2 terminal +sudo mkdir -p /mnt/e/volumes/ollama/data + +# Or from Windows PowerShell +mkdir E:\volumes\ollama\data +``` + +## ▶️ Starting Ollama + +```bash +# Navigate to project directory +cd ~/ollama-docker + +# Start the service +docker compose up -d + +# Check logs +docker compose logs -f ollama + +# Verify service is running +curl http://localhost:11434 +``` + +Expected response: `Ollama is running` + +### Access from Windows + +Ollama is accessible from both WSL2 and Windows: +- **WSL2**: `http://localhost:11434` +- **Windows**: `http://localhost:11434` + +## 📦 Model Management + +### List Available Models + +```bash +# Inside container +docker exec -it ollama ollama list + +# Or from WSL2 (if ollama CLI installed) +ollama list +``` + +### Pull/Download Models + +```bash +# Pull a model +docker exec -it ollama ollama pull llama3.2 + +# Popular models +docker exec -it ollama ollama pull mistral +docker exec -it ollama ollama pull codellama +docker exec -it ollama ollama pull phi3 +docker exec -it ollama ollama pull llama3.2:70b +``` + +### Model Sizes Reference + +| Model | Parameters | Size | RAM Required | VRAM Required | +|-------|-----------|------|--------------|---------------| +| `phi3` | 3.8B | ~2.3 GB | 8 GB | 4 GB | +| `llama3.2` | 8B | ~4.7 GB | 8 GB | 6 GB | +| `mistral` | 7B | ~4.1 GB | 8 GB | 6 GB | +| `llama3.2:70b` | 70B | ~40 GB | 64 GB | 48 GB | +| `codellama` | 7B | ~3.8 GB | 8 GB | 6 GB | + +### Remove/Unload Models + +```bash +# Remove a model from disk +docker exec -it ollama ollama rm llama3.2 + +# Stop a running model (unload from memory) +docker exec -it ollama ollama stop llama3.2 + +# Show running models +docker exec -it ollama ollama ps +``` + +### Copy Models Between Systems + +```bash +# Export model +docker exec ollama ollama show llama3.2 --modelfile > Modelfile + +# Import on another system +cat Modelfile | docker exec -i ollama ollama create my-model -f - +``` + +## 💡 Usage Examples + +### Interactive Chat + +```bash +# Start interactive session +docker exec -it ollama ollama run llama3.2 + +# Chat with specific model +docker exec -it ollama ollama run mistral "Explain quantum computing" +``` + +### Using the API + +#### Generate Completion + +```bash +curl http://localhost:11434/api/generate -d '{ + "model": "llama3.2", + "prompt": "Why is the sky blue?", + "stream": false +}' +``` + +#### Chat Completion + +```bash +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.2", + "messages": [ + { + "role": "user", + "content": "Hello! Can you help me with Python?" + } + ], + "stream": false +}' +``` + +#### Streaming Response + +```bash +curl http://localhost:11434/api/generate -d '{ + "model": "llama3.2", + "prompt": "Write a haiku about programming", + "stream": true +}' +``` + +### Python Example (from Windows or WSL2) + +```python +import requests +import json + +def chat_with_ollama(prompt, model="llama3.2"): + url = "http://localhost:11434/api/generate" + payload = { + "model": model, + "prompt": prompt, + "stream": False + } + + response = requests.post(url, json=payload) + return response.json()["response"] + +# Usage +result = chat_with_ollama("What is Docker?") +print(result) +``` + +### JavaScript Example (from Windows or WSL2) + +```javascript +async function chatWithOllama(prompt, model = "llama3.2") { + const response = await fetch("http://localhost:11434/api/generate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: model, + prompt: prompt, + stream: false + }) + }); + + const data = await response.json(); + return data.response; +} + +// Usage +chatWithOllama("Explain REST APIs").then(console.log); +``` + +## 🔌 API Reference + +### Main Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/api/generate` | POST | Generate text completion | +| `/api/chat` | POST | Chat completion with conversation history | +| `/api/tags` | GET | List available models | +| `/api/pull` | POST | Download a model | +| `/api/push` | POST | Upload a custom model | +| `/api/embeddings` | POST | Generate embeddings | + +### Generate Parameters + +```json +{ + "model": "llama3.2", + "prompt": "Your prompt here", + "stream": false, + "options": { + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "num_predict": 128, + "stop": ["\n"] + } +} +``` + +## 🐛 Troubleshooting + +### Container Won't Start + +```bash +# Check logs +docker compose logs ollama + +# Common issues: +# 1. GPU not accessible +docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi + +# 2. Port already in use +netstat -ano | findstr :11434 # From Windows PowerShell +ss -tulpn | grep 11434 # From WSL2 +``` + +### GPU Not Detected in WSL2 + +```powershell +# Update NVIDIA driver (from Windows) +# Download latest driver from: https://www.nvidia.com/Download/index.aspx + +# Restart WSL2 (from PowerShell) +wsl --shutdown +wsl + +# Verify GPU +nvidia-smi +``` + +### Model Download Fails + +```bash +# Check disk space +docker exec ollama df -h /root/.ollama + +# Check WSL2 disk space +df -h /mnt/e + +# Retry with verbose logging +docker exec -it ollama ollama pull llama3.2 --verbose +``` + +### Out of Memory Errors + +```bash +# Check GPU memory +nvidia-smi + +# Use smaller model or reduce context +docker exec ollama ollama run llama3.2 --num-ctx 2048 +``` + +### WSL2 Disk Space Issues + +```powershell +# Compact WSL2 virtual disk (from PowerShell as Admin) +wsl --shutdown +Optimize-VHD -Path "$env:LOCALAPPDATA\Packages\CanonicalGroupLimited.Ubuntu24.04LTS_*\LocalState\ext4.vhdx" -Mode Full +``` + +### Docker Desktop Integration Issues + +1. Open Docker Desktop +2. Go to **Settings → Resources → WSL Integration** +3. Enable integration with **Ubuntu-24.04** +4. Click **Apply & Restart** + +### Permission Denied on Volume + +```bash +# From WSL2 +sudo chmod -R 755 /mnt/e/volumes/ollama/data +``` + +## ⚡ Performance Tips + +### 1. WSL2 Memory Configuration + +Create/edit `.wslconfig` in Windows user directory (`C:\Users\YourName\.wslconfig`): + +```ini +[wsl2] +memory=16GB +processors=8 +swap=8GB +``` + +Apply changes: +```powershell +wsl --shutdown +wsl +``` + +### 2. GPU Memory Optimization + +```yaml +# In docker-compose.yml +environment: + - CUDA_VISIBLE_DEVICES=0 + - OLLAMA_NUM_GPU=1 +``` + +### 3. Concurrent Requests + +```yaml +# In docker-compose.yml +environment: + - OLLAMA_MAX_LOADED_MODELS=3 + - OLLAMA_NUM_PARALLEL=4 +``` + +### 4. Context Window + +```bash +# Reduce for faster responses +docker exec ollama ollama run llama3.2 --num-ctx 2048 + +# Increase for longer conversations +docker exec ollama ollama run llama3.2 --num-ctx 8192 +``` + +### 5. Model Quantization + +Use quantized models for better performance: +```bash +# 4-bit quantization (faster, less accurate) +docker exec ollama ollama pull llama3.2:q4_0 + +# 8-bit quantization (balanced) +docker exec ollama ollama pull llama3.2:q8_0 +``` + +### 6. Store Models on SSD + +For best performance, ensure `E:\volumes` is on an SSD, not HDD. + +## 📊 Monitoring + +### Check Resource Usage + +```bash +# Container stats +docker stats ollama + +# GPU utilization (from WSL2 or Windows) +nvidia-smi + +# Continuous monitoring +watch -n 1 nvidia-smi +``` + +### Model Status + +```bash +# Show running models +docker exec ollama ollama ps + +# Model information +docker exec ollama ollama show llama3.2 +``` + +### WSL2 Resource Usage + +```powershell +# From Windows PowerShell +wsl --list --verbose +``` + +## 🛑 Stopping and Cleanup + +```bash +# Stop service +docker compose down + +# Stop and remove volumes +docker compose down -v + +# Remove all models +docker exec ollama sh -c "rm -rf /root/.ollama/models/*" + +# Shutdown WSL2 (from Windows PowerShell) +wsl --shutdown +``` + +## 🔗 Useful Links + +- [Ollama Official Documentation](https://github.com/ollama/ollama) +- [Ollama Model Library](https://ollama.com/library) +- [API Documentation](https://github.com/ollama/ollama/blob/main/docs/api.md) +- [WSL2 GPU Documentation](https://learn.microsoft.com/en-us/windows/wsl/tutorials/gpu-compute) +- [Docker Desktop WSL2 Backend](https://docs.docker.com/desktop/wsl/) + +## 🎯 Quick Reference + +### Common Commands + +```bash +# Start Ollama +docker compose up -d + +# Pull a model +docker exec -it ollama ollama pull llama3.2 + +# Run interactive chat +docker exec -it ollama ollama run llama3.2 + +# List models +docker exec -it ollama ollama list + +# Check GPU +nvidia-smi + +# Stop Ollama +docker compose down +``` + +## 📝 Notes for WSL2 Users + +- **Path Conversion**: Windows `E:\folder` = WSL2 `/mnt/e/folder` +- **Performance**: Models stored on Windows drives are accessible but slightly slower +- **GPU Passthrough**: Handled automatically by Docker Desktop +- **Networking**: `localhost` works from both Windows and WSL2 +- **Memory**: Configure WSL2 memory in `.wslconfig` for large models + +--- + +**Need help?** Open an issue or check the [Ollama Discord](https://discord.gg/ollama) \ No newline at end of file diff --git a/ollama/docker-compose.yml b/ollama/docker-compose.yml new file mode 100644 index 0000000..ada2d31 --- /dev/null +++ b/ollama/docker-compose.yml @@ -0,0 +1,33 @@ +services: + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "11434:11434" + volumes: + - /mnt/e/volumes/ollama/data:/root/.ollama + environment: + - OLLAMA_HOST=0.0.0.0:11434 + # Optional: Set GPU device if you have multiple GPUs + # - NVIDIA_VISIBLE_DEVICES=0 + command: serve + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + networks: + - app-network + healthcheck: + test: ["CMD", "ollama", "list"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + +networks: + app-network: + driver: bridge \ No newline at end of file