[Unit] Description=llama.cpp inference server (OpenAI-compatible HTTP API) Documentation=https://github.com/ggml-org/llama.cpp After=network-online.target Wants=network-online.target [Service] Type=exec User=llama-cpp Group=llama-cpp EnvironmentFile=/etc/llama-cpp/llama-server.conf # NOTE: The binary is typically at /usr/bin/llama-server. If it was installed # into /usr/lib64/llama.cpp/ instead, adjust this path after the first emerge. ExecStart=/usr/bin/llama-server \ --model ${LLAMA_MODEL} \ --host ${LLAMA_HOST} \ --port ${LLAMA_PORT} \ --threads ${LLAMA_THREADS} \ --ctx-size ${LLAMA_CTX} \ $LLAMA_EXTRA_ARGS Restart=on-failure RestartSec=3 # Models live under /var/lib/llama-cpp/models (StateDirectory) StateDirectory=llama-cpp WorkingDirectory=/var/lib/llama-cpp # --- Hardening --- NoNewPrivileges=true ProtectSystem=strict ProtectHome=true PrivateTmp=true ProtectKernelTunables=true ProtectKernelModules=true RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX SystemCallFilter=@system-service # NOTE: systemd cannot expand environment variables in ReadWritePaths, # ReadOnlyPaths, or StateDirectory directives. If your models live OUTSIDE # /var/lib/llama-cpp (e.g. under /home or another mount), you must create a # drop-in override granting the appropriate ReadWritePaths= or ReadOnlyPaths= # to that location. ProtectHome=true may also need to be overridden in that # case. Example drop-in: # # /etc/systemd/system/llama-server.service.d/models-path.conf # [Service] # ProtectHome=false # ReadOnlyPaths=/home/models [Install] WantedBy=multi-user.target