# Copyright 2026 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

LLVM_COMPAT=( {18..22} )

inherit cmake git-r3 llvm-r2 systemd toolchain-funcs

DESCRIPTION="Official inference framework for 1-bit LLMs"
HOMEPAGE="https://github.com/microsoft/BitNet"
EGIT_REPO_URI="https://github.com/microsoft/BitNet.git"

LICENSE="MIT"
SLOT="0"
IUSE="+openmp cpu_flags_x86_avx2 cpu_flags_arm_neon"

DEPEND="
	openmp? ( llvm-runtimes/openmp:= )
"
RDEPEND="
	${DEPEND}
	acct-user/ollama
"
BDEPEND="
	$(llvm_gen_dep '
		llvm-core/clang:${LLVM_SLOT}
	')
"

bitnet_run_codegen() {
	# The forked llama.cpp unconditionally references bitnet-lut-kernels.h.
	# Codegen creates it with arch-specific optimized kernels; the content
	# is guarded by preprocessor conditionals.
	local codegen_script codegen_args=(
		--model bitnet_b1_58-3B
		--BM 160,320,320
		--BK 96,96,96
		--bm 32,32,32
	)

	if use cpu_flags_arm_neon; then
		codegen_script=utils/codegen_tl1.py
	else
		codegen_script=utils/codegen_tl2.py
	fi

	python3 "${codegen_script}" "${codegen_args[@]}" \
		|| die "codegen failed"
}

src_prepare() {
	# Fix const-correctness for clang >= 21
	sed -i 's/int8_t \* y_col = y/const int8_t * y_col = y/' \
		src/ggml-bitnet-mad.cpp || die

	cmake_src_prepare
	bitnet_run_codegen
}

src_configure() {
	CC="$(get_llvm_prefix)/bin/clang"
	CXX="$(get_llvm_prefix)/bin/clang++"
	tc-export CC CXX

	local mycmakeargs=(
		-DBITNET_X86_TL2=$(usex cpu_flags_x86_avx2 ON OFF)
		-DBITNET_ARM_TL1=$(usex cpu_flags_arm_neon ON OFF)
		-DBUILD_NUMBER=1
	)
	cmake_src_configure
}

src_install() {
	newbin "${BUILD_DIR}/bin/llama-cli" bitnet-cli
	newbin "${BUILD_DIR}/bin/llama-server" bitnet-server

	insinto /usr/share/${PN}
	doins run_inference.py
	doins setup_env.py

	systemd_dounit "${FILESDIR}/bitnet.service"

	keepdir /var/lib/bitnet/models

	dodoc README.md
}