[Unit]
Description=llama.cpp inference server (OpenAI-compatible HTTP API)
Documentation=https://github.com/ggml-org/llama.cpp
After=network-online.target
Wants=network-online.target

[Service]
Type=exec
User=llama-cpp
Group=llama-cpp
EnvironmentFile=/etc/llama-cpp/llama-server.conf

# NOTE: The binary is typically at /usr/bin/llama-server.  If it was installed
# into /usr/lib64/llama.cpp/ instead, adjust this path after the first emerge.
ExecStart=/usr/bin/llama-server \
    --model ${LLAMA_MODEL} \
    --host ${LLAMA_HOST} \
    --port ${LLAMA_PORT} \
    --threads ${LLAMA_THREADS} \
    --ctx-size ${LLAMA_CTX} \
    $LLAMA_EXTRA_ARGS

Restart=on-failure
RestartSec=3

# Models live under /var/lib/llama-cpp/models (StateDirectory)
StateDirectory=llama-cpp
WorkingDirectory=/var/lib/llama-cpp

# --- Hardening ---
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
ProtectKernelTunables=true
ProtectKernelModules=true
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
SystemCallFilter=@system-service

# NOTE: systemd cannot expand environment variables in ReadWritePaths,
# ReadOnlyPaths, or StateDirectory directives.  If your models live OUTSIDE
# /var/lib/llama-cpp (e.g. under /home or another mount), you must create a
# drop-in override granting the appropriate ReadWritePaths= or ReadOnlyPaths=
# to that location.  ProtectHome=true may also need to be overridden in that
# case.  Example drop-in:
#
#   /etc/systemd/system/llama-server.service.d/models-path.conf
#   [Service]
#   ProtectHome=false
#   ReadOnlyPaths=/home/models

[Install]
WantedBy=multi-user.target