# Copyright 1999-2026 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 # supports ROCM/HIP >=5.5, but we define 7.0 to match the rest of the overlay ROCM_VERSION="7.0" inherit cuda rocm cmake flag-o-matic go-module linux-info systemd # Upstream's CMake superbuild (cmake/local.cmake) builds the GGML/llama.cpp # inference backends by fetching a pinned llama.cpp via ExternalProject / # FetchContent. The pinned commit lives in the LLAMA_CPP_VERSION file in the # ollama source tree; we prestage that exact tree as a distfile and point the # build at it through FETCHCONTENT_SOURCE_DIR_LLAMA_CPP so the build never # touches the network. Re-check on every bump: `cat LLAMA_CPP_VERSION` in the # matching ollama tag. verified 2026-06-18 (ollama 0.30.10 -> llama.cpp b9672). LLAMACPP_COMMIT="b9672" DESCRIPTION="Get up and running with Llama 3, Mistral, Gemma, and other language models" HOMEPAGE="https://ollama.com" MY_PV="${PV/_rc/-rc}" MY_P="${PN}-${MY_PV}" SRC_URI=" https://github.com/ollama/${PN}/archive/refs/tags/v${MY_PV}.tar.gz -> ${MY_P}.gh.tar.gz https://github.com/gentoo-golang-dist/${PN}/releases/download/v${MY_PV}/${MY_P}-deps.tar.xz https://github.com/ggml-org/llama.cpp/archive/refs/tags/${LLAMACPP_COMMIT}.tar.gz -> ${PN}-llama.cpp-${LLAMACPP_COMMIT}.tar.gz " S="${WORKDIR}/${PN}-${MY_PV}" LLAMACPP_S="${WORKDIR}/llama.cpp-${LLAMACPP_COMMIT}" LICENSE="MIT" SLOT="0" KEYWORDS="~amd64" # cuda -> cuda_v13 llama-server backend (this overlay tracks CUDA 13.x; # cuda_v12 is intentionally not wired). # rocm -> rocm_v7_2 (Linux) llama-server backend. # vulkan-> vulkan llama-server backend. # The CPU backend (all microarch variants) is always built and the right one # is dlopen'd at runtime, so no cpu_flags_x86 USE flags are needed. IUSE="cuda rocm vulkan" # Upstream tests pull models from the network; the dependency tarball is a # GitHub release asset that must not be mirrored. RESTRICT="mirror test" CDEPEND=" cuda? ( dev-util/nvidia-cuda-toolkit:= ) rocm? ( >=dev-util/hip-${ROCM_VERSION}:= >=sci-libs/hipBLAS-${ROCM_VERSION}:= >=sci-libs/rocBLAS-${ROCM_VERSION}:= ) " DEPEND="${CDEPEND}" BDEPEND=" >=dev-lang/go-1.26.0 vulkan? ( dev-util/vulkan-headers media-libs/shaderc ) " RDEPEND=" ${CDEPEND} acct-group/${PN} >=acct-user/${PN}-3[cuda?] vulkan? ( media-libs/vulkan-loader ) " PATCHES=( "${FILESDIR}/${P}-unbundle-gpu-runtime-libs.patch" "${FILESDIR}/${P}-rocm-no-parallel-jobs.patch" ) pkg_setup() { if use rocm; then linux-info_pkg_setup if linux-info_get_any_version && linux_config_exists; then if ! linux_chkconfig_present HSA_AMD_SVM; then ewarn "To use ROCm/HIP, you need to have HSA_AMD_SVM option enabled in your kernel." fi fi fi } src_unpack() { if use rocm; then # ROCm/HIP rejects some LTO flags; filter before the Go env captures # them into CGO_*. 963401 strip-unsupported-flags export CXXFLAGS="$(test-flags-HIPCXX "${CXXFLAGS}")" fi # Unpacks the ollama source, the prestaged llama.cpp tree, and the Go # dependency tarball (the latter into GOMODCACHE). go-module_src_unpack } src_prepare() { cmake_src_prepare # The compat layer (llama/compat) links Ollama-owned sources into the # llama.cpp targets AND patches call-sites into llama.cpp itself. Upstream # applies that patch set only when it fetches llama.cpp; because we prestage # the source and pass FETCHCONTENT_SOURCE_DIR_LLAMA_CPP (which forces # OLLAMA_LLAMA_CPP_SKIP_COMPAT_PATCH=ON in the sub-build), we must apply it # ourselves. Mirror apply-patch.cmake: every llama/compat/**/*.patch, in # basename-numeric order (001-hooks, 002-ui, models/003-laguna, ...). The # models/*.patch add not-yet-upstreamed architectures whose .cpp are linked # into the llama target, so the whole set is required — a non-recursive glob # silently drops them and the linked sources then fail to compile. local _compat_patch pushd "${LLAMACPP_S}" >/dev/null || die while IFS= read -r _compat_patch; do eapply "${_compat_patch}" done < <(find "${S}"/llama/compat -name '*.patch' -printf '%f\t%p\n' | sort | cut -f2-) popd >/dev/null || die # The Go binary resolves its runtime payload at exeDir/../lib/ollama # (ml/path.go). We install to $(get_libdir)/ollama (lib64 on multilib), so # teach the binary to look there. No-op on lib (32-bit) layouts. sed -i -e "s/\"lib\", \"ollama\"/\"$(get_libdir)\", \"ollama\"/g" \ ml/path.go || die "libdir sed failed" } src_configure() { local backends=() use cuda && backends+=( cuda_v13 ) use rocm && backends+=( rocm_v7_2 ) use vulkan && backends+=( vulkan ) local mycmakeargs=( -DOLLAMA_VERSION="${PV}" -DOLLAMA_LIB_DIR="$(get_libdir)/ollama" # Prestaged llama.cpp source; no network FetchContent. -DFETCHCONTENT_SOURCE_DIR_LLAMA_CPP="${LLAMACPP_S}" -DGGML_CCACHE=OFF -DOLLAMA_LLAMA_BACKENDS="$(IFS=';'; echo "${backends[*]}")" ) if use rocm; then # Forward the configured GPU arch(s); the superbuild then selects the # rocm_v7_2_user_arch preset and passes AMDGPU_TARGETS down to ggml-hip. mycmakeargs+=( -DAMDGPU_TARGETS="$(get_amdgpu_flags)" ) fi cmake_src_configure } src_compile() { if use cuda; then # nvcc rejects gcc newer than CUDA supports; cuda_gccdir picks a # compatible slot. The CUDA backend is built by a nested CMake project # (ExternalProject) during this phase, so the host-compiler choice and # the device-node sandbox allowances must be in effect here, not in # src_configure. CMake reads CUDAHOSTCXX into CMAKE_CUDA_HOST_COMPILER. local -x CUDAHOSTCXX CUDAHOSTCXX="$(cuda_gccdir)" cuda_add_sandbox -w addpredict "/dev/char/" fi if use rocm; then # ggml-hip is built by a nested CMake project that uses # enable_language(HIP); CMake needs the ROCm clang as the HIP compiler # (the hipcc wrapper isn't recognized). Point HIPCXX at the clang ROCm # itself uses, leaving CC/CXX as gcc for the CPU backend and Go cgo. # Pre-seed the device list so the HIP compiler's GPU enumeration doesn't # reach /dev/kfd inside the sandbox (no GPU present at build time). local hipclangpath hipclangpath="$(hipconfig --hipclangpath 2>/dev/null)" || die "hipconfig failed" [[ -x ${hipclangpath}/clang++ ]] || die "ROCm clang not found at ${hipclangpath}" local -x HIPCXX="${hipclangpath}/clang++" local -x HIP_PATH="${ESYSROOT}/usr" local -x ROCM_TARGET_LST="${T}/rocm_targets.lst" printf '%s\n' "${AMDGPU_TARGETS[@]}" > "${ROCM_TARGET_LST}" || die addpredict /dev/kfd addpredict /dev/dri fi cmake_src_compile } src_install() { # NB: not cmake_src_install. That runs `cmake --build --target install`, # which re-enters the BUILD_ALWAYS llama-server ExternalProjects; since each # carries an absolute CMAKE_INSTALL_PREFIX (the superbuild staging dir), the # re-run drops a duplicate payload at ${D}/ once DESTDIR is set. # Running the top-level install script directly installs the Go binary and # the already-staged lib/ollama payload without re-entering the sub-builds. DESTDIR="${D}" cmake --install "${BUILD_DIR}" || die newinitd "${FILESDIR}"/ollama.init "${PN}" newconfd "${FILESDIR}"/ollama.confd "${PN}" systemd_dounit "${FILESDIR}"/ollama.service } pkg_preinst() { keepdir /var/log/ollama fperms 750 /var/log/ollama fowners "${PN}:${PN}" /var/log/ollama } pkg_postinst() { if [[ -z ${REPLACING_VERSIONS} ]]; then einfo "Quick guide:" einfo " ollama serve" einfo " ollama run llama3" einfo einfo "See available models at https://ollama.com/library" fi einfo einfo "Ollama binds 127.0.0.1 port 11434 by default." einfo "Change the bind address with the OLLAMA_HOST environment variable." einfo "See https://docs.ollama.com/faq for more info" einfo if use cuda; then einfo "USE=cuda builds the GPU backend for the GPU present at build time" einfo "(CMAKE_CUDA_ARCHITECTURES defaults to 'native'). Set CUDAARCHS to" einfo "override. The ${PN} user must be in the video group to see devices;" einfo "acct-user/${PN}[cuda] arranges this." fi }