# Copyright 1999-2026 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

# supports ROCM/HIP >=5.5, but we define 7.0 to match the rest of the overlay
ROCM_VERSION="7.0"

inherit cuda rocm cmake flag-o-matic go-module linux-info systemd

# Upstream's CMake superbuild (cmake/local.cmake) builds the GGML/llama.cpp
# inference backends by fetching a pinned llama.cpp via ExternalProject /
# FetchContent. The pinned commit lives in the LLAMA_CPP_VERSION file in the
# ollama source tree; we prestage that exact tree as a distfile and point the
# build at it through FETCHCONTENT_SOURCE_DIR_LLAMA_CPP so the build never
# touches the network. Re-check on every bump: `cat LLAMA_CPP_VERSION` in the
# matching ollama tag. verified 2026-06-18 (ollama 0.30.10 -> llama.cpp b9672).
LLAMACPP_COMMIT="b9672"

DESCRIPTION="Get up and running with Llama 3, Mistral, Gemma, and other language models"
HOMEPAGE="https://ollama.com"

MY_PV="${PV/_rc/-rc}"
MY_P="${PN}-${MY_PV}"
SRC_URI="
	https://github.com/ollama/${PN}/archive/refs/tags/v${MY_PV}.tar.gz -> ${MY_P}.gh.tar.gz
	https://github.com/gentoo-golang-dist/${PN}/releases/download/v${MY_PV}/${MY_P}-deps.tar.xz
	https://github.com/ggml-org/llama.cpp/archive/refs/tags/${LLAMACPP_COMMIT}.tar.gz
		-> ${PN}-llama.cpp-${LLAMACPP_COMMIT}.tar.gz
"
S="${WORKDIR}/${PN}-${MY_PV}"
LLAMACPP_S="${WORKDIR}/llama.cpp-${LLAMACPP_COMMIT}"

LICENSE="MIT"
SLOT="0"
KEYWORDS="~amd64"

# cuda  -> cuda_v13 llama-server backend (this overlay tracks CUDA 13.x;
#          cuda_v12 is intentionally not wired).
# rocm  -> rocm_v7_2 (Linux) llama-server backend.
# vulkan-> vulkan llama-server backend.
# The CPU backend (all microarch variants) is always built and the right one
# is dlopen'd at runtime, so no cpu_flags_x86 USE flags are needed.
IUSE="cuda rocm vulkan"

# Upstream tests pull models from the network; the dependency tarball is a
# GitHub release asset that must not be mirrored.
RESTRICT="mirror test"

CDEPEND="
	cuda? ( dev-util/nvidia-cuda-toolkit:= )
	rocm? (
		>=dev-util/hip-${ROCM_VERSION}:=
		>=sci-libs/hipBLAS-${ROCM_VERSION}:=
		>=sci-libs/rocBLAS-${ROCM_VERSION}:=
	)
"
DEPEND="${CDEPEND}"
BDEPEND="
	>=dev-lang/go-1.26.0
	vulkan? (
		dev-util/vulkan-headers
		media-libs/shaderc
	)
"
RDEPEND="
	${CDEPEND}
	acct-group/${PN}
	>=acct-user/${PN}-3[cuda?]
	vulkan? ( media-libs/vulkan-loader )
"

PATCHES=(
	"${FILESDIR}/${P}-unbundle-gpu-runtime-libs.patch"
	"${FILESDIR}/${P}-rocm-no-parallel-jobs.patch"
)

pkg_setup() {
	if use rocm; then
		linux-info_pkg_setup
		if linux-info_get_any_version && linux_config_exists; then
			if ! linux_chkconfig_present HSA_AMD_SVM; then
				ewarn "To use ROCm/HIP, you need to have HSA_AMD_SVM option enabled in your kernel."
			fi
		fi
	fi
}

src_unpack() {
	if use rocm; then
		# ROCm/HIP rejects some LTO flags; filter before the Go env captures
		# them into CGO_*. 963401
		strip-unsupported-flags
		export CXXFLAGS="$(test-flags-HIPCXX "${CXXFLAGS}")"
	fi

	# Unpacks the ollama source, the prestaged llama.cpp tree, and the Go
	# dependency tarball (the latter into GOMODCACHE).
	go-module_src_unpack
}

src_prepare() {
	cmake_src_prepare

	# The compat layer (llama/compat) links Ollama-owned sources into the
	# llama.cpp targets AND patches call-sites into llama.cpp itself. Upstream
	# applies that patch set only when it fetches llama.cpp; because we prestage
	# the source and pass FETCHCONTENT_SOURCE_DIR_LLAMA_CPP (which forces
	# OLLAMA_LLAMA_CPP_SKIP_COMPAT_PATCH=ON in the sub-build), we must apply it
	# ourselves. Mirror apply-patch.cmake: every llama/compat/**/*.patch, in
	# basename-numeric order (001-hooks, 002-ui, models/003-laguna, ...). The
	# models/*.patch add not-yet-upstreamed architectures whose .cpp are linked
	# into the llama target, so the whole set is required — a non-recursive glob
	# silently drops them and the linked sources then fail to compile.
	local _compat_patch
	pushd "${LLAMACPP_S}" >/dev/null || die
	while IFS= read -r _compat_patch; do
		eapply "${_compat_patch}"
	done < <(find "${S}"/llama/compat -name '*.patch' -printf '%f\t%p\n' | sort | cut -f2-)
	popd >/dev/null || die

	# The Go binary resolves its runtime payload at exeDir/../lib/ollama
	# (ml/path.go). We install to $(get_libdir)/ollama (lib64 on multilib), so
	# teach the binary to look there. No-op on lib (32-bit) layouts.
	sed -i -e "s/\"lib\", \"ollama\"/\"$(get_libdir)\", \"ollama\"/g" \
		ml/path.go || die "libdir sed failed"
}

src_configure() {
	local backends=()
	use cuda && backends+=( cuda_v13 )
	use rocm && backends+=( rocm_v7_2 )
	use vulkan && backends+=( vulkan )

	local mycmakeargs=(
		-DOLLAMA_VERSION="${PV}"
		-DOLLAMA_LIB_DIR="$(get_libdir)/ollama"
		# Prestaged llama.cpp source; no network FetchContent.
		-DFETCHCONTENT_SOURCE_DIR_LLAMA_CPP="${LLAMACPP_S}"
		-DGGML_CCACHE=OFF
		-DOLLAMA_LLAMA_BACKENDS="$(IFS=';'; echo "${backends[*]}")"
	)

	if use rocm; then
		# Forward the configured GPU arch(s); the superbuild then selects the
		# rocm_v7_2_user_arch preset and passes AMDGPU_TARGETS down to ggml-hip.
		mycmakeargs+=( -DAMDGPU_TARGETS="$(get_amdgpu_flags)" )
	fi

	cmake_src_configure
}

src_compile() {
	if use cuda; then
		# nvcc rejects gcc newer than CUDA supports; cuda_gccdir picks a
		# compatible slot. The CUDA backend is built by a nested CMake project
		# (ExternalProject) during this phase, so the host-compiler choice and
		# the device-node sandbox allowances must be in effect here, not in
		# src_configure. CMake reads CUDAHOSTCXX into CMAKE_CUDA_HOST_COMPILER.
		local -x CUDAHOSTCXX
		CUDAHOSTCXX="$(cuda_gccdir)"
		cuda_add_sandbox -w
		addpredict "/dev/char/"
	fi

	if use rocm; then
		# ggml-hip is built by a nested CMake project that uses
		# enable_language(HIP); CMake needs the ROCm clang as the HIP compiler
		# (the hipcc wrapper isn't recognized). Point HIPCXX at the clang ROCm
		# itself uses, leaving CC/CXX as gcc for the CPU backend and Go cgo.
		# Pre-seed the device list so the HIP compiler's GPU enumeration doesn't
		# reach /dev/kfd inside the sandbox (no GPU present at build time).
		local hipclangpath
		hipclangpath="$(hipconfig --hipclangpath 2>/dev/null)" || die "hipconfig failed"
		[[ -x ${hipclangpath}/clang++ ]] || die "ROCm clang not found at ${hipclangpath}"
		local -x HIPCXX="${hipclangpath}/clang++"
		local -x HIP_PATH="${ESYSROOT}/usr"
		local -x ROCM_TARGET_LST="${T}/rocm_targets.lst"
		printf '%s\n' "${AMDGPU_TARGETS[@]}" > "${ROCM_TARGET_LST}" || die
		addpredict /dev/kfd
		addpredict /dev/dri
	fi

	cmake_src_compile
}

src_install() {
	# NB: not cmake_src_install. That runs `cmake --build --target install`,
	# which re-enters the BUILD_ALWAYS llama-server ExternalProjects; since each
	# carries an absolute CMAKE_INSTALL_PREFIX (the superbuild staging dir), the
	# re-run drops a duplicate payload at ${D}/<buildpath> once DESTDIR is set.
	# Running the top-level install script directly installs the Go binary and
	# the already-staged lib/ollama payload without re-entering the sub-builds.
	DESTDIR="${D}" cmake --install "${BUILD_DIR}" || die

	newinitd "${FILESDIR}"/ollama.init "${PN}"
	newconfd "${FILESDIR}"/ollama.confd "${PN}"
	systemd_dounit "${FILESDIR}"/ollama.service
}

pkg_preinst() {
	keepdir /var/log/ollama
	fperms 750 /var/log/ollama
	fowners "${PN}:${PN}" /var/log/ollama
}

pkg_postinst() {
	if [[ -z ${REPLACING_VERSIONS} ]]; then
		einfo "Quick guide:"
		einfo "  ollama serve"
		einfo "  ollama run llama3"
		einfo
		einfo "See available models at https://ollama.com/library"
	fi

	einfo
	einfo "Ollama binds 127.0.0.1 port 11434 by default."
	einfo "Change the bind address with the OLLAMA_HOST environment variable."
	einfo "See https://docs.ollama.com/faq for more info"
	einfo

	if use cuda; then
		einfo "USE=cuda builds the GPU backend for the GPU present at build time"
		einfo "(CMAKE_CUDA_ARCHITECTURES defaults to 'native'). Set CUDAARCHS to"
		einfo "override. The ${PN} user must be in the video group to see devices;"
		einfo "acct-user/${PN}[cuda] arranges this."
	fi
}