# Copyright 1999-2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

# Ollama configuration file for OpenRC
# This file is sourced by the init script

# Bind address for Ollama API server
# Default: 0.0.0.0:11434 (listens on all interfaces)
# For localhost only: 127.0.0.1:11434
OLLAMA_HOST="0.0.0.0:11434"

# Directory where models are stored
# Default: /var/lib/ollama
OLLAMA_MODELS="/var/lib/ollama"

# How long to keep models in memory after use
# Examples: "5m" (5 minutes), "1h" (1 hour), "0" (unload immediately)
# Default: 5m
#OLLAMA_KEEP_ALIVE="5m"

# Number of parallel requests to process
# Default: 1 (sequential processing)
# Increase for better throughput with multiple concurrent users
#OLLAMA_NUM_PARALLEL="1"

# Maximum number of loaded models
# Default: 1
# Increase if you need multiple models loaded simultaneously
#OLLAMA_MAX_LOADED_MODELS="1"

# Context window size
# Larger values use more memory but allow longer conversations
# Default: 2048
#OLLAMA_NUM_CTX="2048"

# Enable debug logging
# Set to 1 to enable verbose debug output
#OLLAMA_DEBUG="0"

# GPU Configuration (NVIDIA CUDA)
# Comma-separated list of GPU device IDs to use
# Example: "0" (first GPU), "0,1" (first two GPUs)
#CUDA_VISIBLE_DEVICES="0"

# GPU Configuration (AMD ROCm)
# Override GPU version for compatibility
# Example for RX 6000 series: "10.3.0"
# Example for Radeon VII: "9.0.6"
#HSA_OVERRIDE_GFX_VERSION=""

# Memory limits (optional)
# Set maximum memory usage for models
# Example: "8G" for 8 gigabytes
#OLLAMA_MAX_VRAM=""

# Additional command-line arguments
# Add any extra flags to pass to 'ollama serve'
#OLLAMA_EXTRA_ARGS=""