# Copyright 2022-2026 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 # CUDA-slotted variant — installs to /opt/pytorch-cuda/ EAPI=8 PYTHON_COMPAT=( python3_{11..14} ) ROCM_VERSION=6.1 inherit python-single-r1 cmake cuda flag-o-matic prefix MYPN=pytorch MYP=${MYPN}-${PV} FLASH_PV=2.7.4 FLASH_PN=flash-attention FLASH_P=${FLASH_PN}-${FLASH_PV} FLASH_ATT_URI="https://github.com/Dao-AILab/${FLASH_PN}/archive/refs/tags/v${FLASH_PV}.tar.gz -> ${FLASH_P}.gh.tar.gz" DESCRIPTION="A deep learning framework (CUDA backend)" HOMEPAGE="https://pytorch.org/" SRC_URI=" https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz -> ${MYP}.tar.gz flash? ( ${FLASH_ATT_URI} ) memefficient? ( ${FLASH_ATT_URI} ) " S="${WORKDIR}"/${MYP} LICENSE="BSD" SLOT="cuda" KEYWORDS="~amd64" IUSE="cusparselt distributed fbgemm flash gloo kineto memefficient mimalloc mkl mpi nccl nnpack +numpy onednn openblas opencl openmp qnnpack xnnpack" RESTRICT="test" REQUIRED_USE=" ${PYTHON_REQUIRED_USE} mpi? ( distributed ) gloo? ( distributed ) cusparselt? ( cuda ) flash? ( cuda ) memefficient? ( cuda ) " # Force CUDA, no ROCm IUSE+=" +cuda" RDEPEND=" ${PYTHON_DEPS} dev-cpp/abseil-cpp:= dev-cpp/gflags:= >=dev-cpp/glog-0.5.0:= >=dev-libs/cpuinfo-2025.11.14 dev-libs/libfmt:= dev-libs/protobuf:= dev-libs/sleef sci-ml/onnx virtual/lapack dev-libs/cudnn >=sci-ml/cudnn-frontend-1.12.0:= >=dev-util/nvidia-cuda-toolkit-12.9:=[profiler] cusparselt? ( dev-libs/cusparselt ) fbgemm? ( >=sci-ml/FBGEMM-1.4 ) gloo? ( >=sci-ml/gloo-2025.06.04[cuda] ) kineto? ( ~sci-ml/kineto-0.4.0_p20260323 ) mimalloc? ( dev-libs/mimalloc ) mpi? ( virtual/mpi ) nnpack? ( sci-ml/NNPACK dev-libs/pthreadpool ) numpy? ( $(python_gen_cond_dep ' dev-python/numpy[${PYTHON_USEDEP}] ') ) onednn? ( sci-ml/oneDNN ) opencl? ( virtual/opencl ) qnnpack? ( !sci-libs/QNNPACK sci-ml/gemmlowp dev-libs/pthreadpool ) distributed? ( sci-ml/tensorpipe[cuda] dev-cpp/cpp-httplib:= ) xnnpack? ( >=sci-ml/XNNPACK-2024.11 dev-libs/pthreadpool ) mkl? ( sci-libs/mkl ) openblas? ( sci-libs/openblas ) " DEPEND=" ${RDEPEND} dev-cpp/nlohmann_json dev-libs/flatbuffers dev-libs/FXdiv dev-libs/pocketfft dev-libs/psimd sci-ml/FP16 $(python_gen_cond_dep ' =dev-libs/cutlass-3.9.2[tools(+)] onednn? ( sci-ml/ideep ) qnnpack? ( dev-libs/clog ) " PATCHES=( "${FILESDIR}"/${PN}-2.5.1-unbundle_fmt.patch "${FILESDIR}"/${PN}-2.5.1-unbundle_kineto.patch "${FILESDIR}"/${PN}-2.8.0-unbundle_pocketfft.patch "${FILESDIR}"/${PN}-2.5.1-cudnn_include_fix.patch "${FILESDIR}"/${PN}-2.4.0-cpp-httplib.patch "${FILESDIR}"/${PN}-2.5.1-glog-0.6.0.patch "${FILESDIR}"/${PN}-2.7.0-glog-0.7.1.patch "${FILESDIR}"/${PN}-2.9.1-torch_cpu.patch "${FILESDIR}"/${PN}-2.10.0-gentoo.patch "${FILESDIR}"/${PN}-2.11.0-mimalloc.patch "${FILESDIR}"/${P}-removekineto-pr178960.patch ) src_prepare() { if use flash || use memefficient; then mv "${WORKDIR}"/${FLASH_P}/* third_party/${FLASH_PN}/ || die fi filter-lto sed -i \ -e 's|::fmt-header-only||' \ c10/CMakeLists.txt \ cmake/Dependencies.cmake \ torch/CMakeLists.txt \ || die sed -e '/target_compile_options_if_supported(tensorpipe/d' -i cmake/Dependencies.cmake || die sed -i \ -e '/add_subdirectory.*third_party/d' \ CMakeLists.txt \ cmake/Dependencies.cmake \ cmake/ProtoBuf.cmake \ aten/src/ATen/CMakeLists.txt \ || die sed -i \ -e "/EXPORT/s|DESTINATION lib)|DESTINATION $(get_libdir))|" \ c10/cuda/CMakeLists.txt \ c10/CMakeLists.txt \ || die sed -i 's/-Wextra-semi//' cmake/public/utils.cmake || die cmake_src_prepare pushd torch/csrc/jit/serialization > /dev/null || die flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die popd > /dev/null || die hprefixify \ aten/CMakeLists.txt \ caffe2/CMakeLists.txt \ cmake/Metal.cmake \ cmake/Modules/*.cmake \ cmake/Modules_CUDA_fix/FindCUDNN.cmake \ cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \ cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \ cmake/public/cuda.cmake \ cmake/Dependencies.cmake \ torch/CMakeLists.txt \ CMakeLists.txt } src_configure() { if [[ -z ${TORCH_CUDA_ARCH_LIST} ]]; then TORCH_CUDA_ARCH_LIST="6.1 7.5" ewarn "Using default TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}" ewarn "Set TORCH_CUDA_ARCH_LIST in make.conf to customize." fi local mycmakeargs=( -DCMAKE_INSTALL_PREFIX="${EPREFIX}/opt/pytorch-cuda" -DBUILD_CUSTOM_PROTOBUF=OFF -DBUILD_TEST=OFF -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}/opt/pytorch-cuda/$(get_libdir)" -DPython_EXECUTABLE="${PYTHON}" -DTORCH_INSTALL_LIB_DIR="${EPREFIX}/opt/pytorch-cuda/$(get_libdir)" -DUSE_CCACHE=OFF -DUSE_CUDA=ON -DUSE_ROCM=OFF -DUSE_DISTRIBUTED=$(usex distributed) -DUSE_FBGEMM=$(usex fbgemm) -DUSE_FLASH_ATTENTION=$(usex flash) -DUSE_GFLAGS=ON -DUSE_GLOG=ON -DUSE_GLOO=$(usex gloo) -DUSE_ITT=OFF -DUSE_KINETO=$(usex kineto) -DUSE_KLEIDIAI=OFF -DUSE_MAGMA=OFF -DUSE_MEM_EFF_ATTENTION=$(usex memefficient) -DUSE_MIMALLOC=$(usex mimalloc) -DUSE_MKLDNN=$(usex onednn) -DUSE_MPI=$(usex mpi) -DUSE_NCCL=OFF -DUSE_NNPACK=$(usex nnpack) -DUSE_NUMA=OFF -DUSE_NUMPY=$(usex numpy) -DUSE_OPENCL=$(usex opencl) -DUSE_OPENMP=$(usex openmp) -DUSE_PYTORCH_QNNPACK=$(usex qnnpack) -DUSE_PYTORCH_METAL=OFF -DUSE_SYSTEM_CPUINFO=ON -DUSE_SYSTEM_EIGEN_INSTALL=ON -DUSE_SYSTEM_FP16=ON -DUSE_SYSTEM_FXDIV=ON -DUSE_SYSTEM_GLOO=ON -DUSE_SYSTEM_NVTX=ON -DUSE_SYSTEM_ONNX=ON -DUSE_SYSTEM_PSIMD=ON -DUSE_SYSTEM_PTHREADPOOL=ON -DUSE_SYSTEM_PYBIND11=ON -DUSE_SYSTEM_SLEEF=ON -DUSE_SYSTEM_XNNPACK=$(usex xnnpack) -DUSE_TENSORPIPE=$(usex distributed) -DUSE_UCC=OFF -DUSE_VALGRIND=OFF -DUSE_XNNPACK=$(usex xnnpack) -DUSE_XPU=OFF -Wno-dev -DUSE_CUDNN=ON -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" -DUSE_CUSPARSELT=$(usex cusparselt) ) if use mkl; then mycmakeargs+=(-DBLAS=MKL) elif use openblas; then mycmakeargs+=(-DBLAS=OpenBLAS) else mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=) fi cuda_add_sandbox addpredict "/dev/char/" mycmakeargs+=( -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")" ) [[ -v CUDACXX ]] && export PYTORCH_NVCC="${CUDACXX}" if use flash; then export FLASH_ATTENTION_FORCE_BUILD="TRUE" export FLASH_ATTN_CUDA_ARCHS="${CUDAARCHS:-${TORCH_CUDA_ARCH_LIST}}" fi if use onednn; then mycmakeargs+=( -DMKLDNN_FOUND=ON -DMKLDNN_LIBRARIES=dnnl -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl" ) fi cmake_src_configure } src_compile() { PYTORCH_BUILD_VERSION=${PV} \ PYTORCH_BUILD_NUMBER=0 \ cmake_src_compile } python_install() { local sitedir="/opt/pytorch-cuda/lib/python${EPYTHON#python}/site-packages" insinto "${sitedir}" doins -r python/torch mkdir "${D}${sitedir}/torch/bin" || die mkdir "${D}${sitedir}/torch/lib" || die mkdir "${D}${sitedir}/torch/include" || die ln -s "../../../../../include/torch" \ "${D}${sitedir}/torch/include/torch" || die } src_install() { cmake_src_install insinto "/var/lib/${PN}-cuda" doins "${BUILD_DIR}"/CMakeCache.txt rm -rf python mkdir -p python/torch || die cp torch/version.py python/torch/ || die python_install }