# Copyright 1999-2024 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 AMDGPU_TARGETS_COMPAT=( gfx1030 gfx1031 gfx1032 # gfx1033 gfx1034 gfx1035 # gfx1036 gfx1100 gfx1101 gfx1102 # gfx1103 ) CUDA_TARGETS_COMPAT=( sm_30 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90 ) ROCM_VERSION="5.7.1" PYTHON_COMPAT=( python3_{10..12} ) inherit cmake cuda flag-o-matic llvm python-single-r1 rocm toolchain-funcs DESCRIPTION="Intel(R) Open Image Denoise library" HOMEPAGE="http://www.openimagedenoise.org/" LICENSE="Apache-2.0" # MKL_DNN is oneDNN 2.2.4 with additional custom commits. COMPOSABLE_KERNEL_COMMIT="e85178b4ca892a78344271ae64103c9d4d1bfc40" CUTLASS_COMMIT="66d9cddc832c1cdc2b30a8755274f7f74640cfe6" MKL_DNN_COMMIT="9bea36e6b8e341953f922ce5c6f5dbaca9179a86" OIDN_WEIGHTS_COMMIT="4322c25e25a05584f65da1a4be5cef40a4b2e90b" ORG_GH="https://github.com/OpenImageDenoise" # SSE4.1 hardware was released in 2008. # See scripts/build.py for release versioning. # Clang is more smoother multitask-wise. MIN_CLANG_PV="3.3" MIN_GCC_PV="4.8.1" SLOT="0/$(ver_cut 1-2)" LLVM_MAX_SLOT=18 IUSE=" -${CUDA_TARGETS_COMPAT[@]/#/cuda_targets_} -${AMDGPU_TARGETS_COMPAT[@]/#/amdgpu_targets_} examples +built-in-weights +cpu -cuda doc -hip static-libs sycl " gen_required_use_cuda_targets() { local x for x in ${CUDA_TARGETS_COMPAT[@]} ; do echo " cuda_targets_${x}? ( cuda ) " done } gen_required_use_hip_targets() { local x for x in ${AMDGPU_TARGETS_COMPAT[@]} ; do echo " amdgpu_targets_${x}? ( hip ) " done } REQUIRED_USE+=" $(gen_required_use_cuda_targets) $(gen_required_use_hip_targets) ${PYTHON_REQUIRED_USE} cuda? ( || ( ${CUDA_TARGETS_COMPAT[@]/#/cuda_targets_} ) ) hip? ( ${ROCM_REQUIRED_USE} ) " HIP_VERSIONS=( "5.7.1" "5.6.1" ) # 5.3.0 fails gen_hip_depends() { local hip_version for hip_version in ${HIP_VERSIONS[@]} ; do # Needed because of build failures local s=$(ver_cut 1-2 ${hip_version}) echo " ( ~dev-libs/rocm-comgr-${hip_version} ~dev-libs/rocm-device-libs-${hip_version} ~dev-libs/rocr-runtime-${hip_version} ~dev-libs/roct-thunk-interface-${hip_version} ~dev-util/hip-${hip_version} ~dev-util/rocminfo-${hip_version} ) " done } # See https://github.com/OpenImageDenoise/oidn/blob/v1.4.3/scripts/build.py RDEPEND+=" ${PYTHON_DEPS} virtual/libc cuda? ( >=dev-util/nvidia-cuda-toolkit-11.8:= ) hip? ( || ( $(gen_hip_depends) ) dev-util/hip:= ) >=dev-cpp/tbb-2021.5:0= " DEPEND+=" ${RDEPEND} " BDEPEND+=" ${PYTHON_DEPS} >=dev-lang/ispc-1.17.0 >=dev-build/cmake-3.15 examples? ( >=media-libs/openimageio-2.4.15.0[${PYTHON_SINGLE_USEDEP}] ) cuda? ( >=dev-util/nvidia-cuda-toolkit-11.8 ) hip? ( || ( $(gen_hip_depends) ) ) /dev/null 1>/dev/null \ || die "Switch to a c++17 compatible compiler." # Prevent possible # error: Illegal instruction detected: Operand has incorrect register class. replace-flags '-O0' '-O1' export CC=$(tc-getCC) export CXX=$(tc-getCXX) einfo einfo "CC:\t${CC}" einfo "CXX:\t${CXX}" einfo "CHOST:\t${CHOST}" einfo mycmakeargs+=( -DOIDN_APPS=$(usex examples) -DOIDN_INSTALL_DEPENDENCIES=OFF -DOIDN_FILTER_RT=$(usex built-in-weights) -DOIDN_FILTER_RTLIGHTMAP=$(usex built-in-weights) -DOIDN_DEVICE_CPU=$(usex cpu) -DOIDN_DEVICE_CUDA=$(usex cuda) -DOIDN_DEVICE_HIP=$(usex hip) -DOIDN_DEVICE_SYCL=$(usex sycl) ) if use hip ; then local llvm_slot=$(grep -e "HIP_CLANG_ROOT.*/lib/llvm" \ "/usr/$(get_libdir)/cmake/hip/hip-config.cmake" \ | grep -E -o -e "[0-9]+") [[ -n "${llvm_slot}" ]] && "HIP_CLANG_ROOT is missing. emerge hip." einfo "${ESYSROOT}/usr/lib/llvm/${llvm_slot}" mycmakeargs+=( -DHIP_COMPILER_PATH="${ESYSROOT}/usr/lib/llvm/${llvm_slot}" -DOIDN_DEVICE_HIP_COMPILER="${CXX}" ) mycmakeargs+=( -DGPU_TARGETS="$(get_amdgpu_flags)" ) einfo "AMDGPU_TARGETS:\t${targets}" fi if use cuda ; then for CT in ${CUDA_TARGETS_COMPAT[@]}; do use ${CT/#/cuda_targets_} && CUDA_TARGETS+="-gencode arch=compute_${CT#sm_*},code=${CT} " done ( use cuda_targets_sm_80 || use cuda_targets_sm_90 ) && \ sed -e 's/cuda_engine.cu/&\n cutlass_conv_sm80.cu/' -i devices/cuda/CMakeLists.txt use cuda_targets_sm_75 && sed -e 's/cuda_engine.cu/&\n cutlass_conv_sm75.cu/' -i devices/cuda/CMakeLists.txt use cuda_targets_sm_70 && sed -e 's/cuda_engine.cu/&\n cutlass_conv_sm70.cu/' -i devices/cuda/CMakeLists.txt sed -e "33i\set(OIDN_NVCC_FLAGS \"-forward-unknown-opts -fno-lto ${NVCCFLAGS//\"/} ${CUDA_TARGETS%% }\")" -i devices/cuda/CMakeLists.txt || die "Sed failed" mycmakeargs+=( -DCUDAToolkit_ROOT="/opt/cuda" -DOIDN_DEVICE_CUDA_COMPILER="/opt/cuda/bin/nvcc" -DCUDA_COMPILER_PATH="$(cuda_gccdir)" -DCMAKE_CUDA_HOST_COMPILER="$(cuda_gccdir)\/g++" ) einfo "CUDA_TARGETS:\t${CUDA_TARGETS//*=/}" einfo "NVCCFLAGS:\t${NVCCFLAGS}" fi cmake_src_configure } src_install() { cmake_src_install if ! use doc ; then rm -vrf "${ED}/usr/share/doc/oidn-${PV}/readme.pdf" || die fi use doc && einstalldocs docinto licenses dodoc LICENSE.txt # Generated when hip is enabled. rm -rf "${ED}/var" } pkg_postinst() { if use hip ; then ewarn ewarn "All APU + GPU HIP targets on the device must be built/installed to avoid" ewarn "a crash." ewarn fi }