From c79a2c525edce42523b35da3edb87ae937a6e5b1 Mon Sep 17 00:00:00 2001
From: Yulong Wang <7679871+fs-eire@users.noreply.github.com>
Date: Thu, 31 Jul 2025 15:02:37 -0700
Subject: [PATCH] [build] fix macOS x86_64 cross-compile warning

---
 cmake/CMakeLists.txt                            |  1 +
 cmake/onnxruntime_config.h.in                   |  1 +
 .../lib/sqnbitgemm_kernel_avx2_int8_blklen32.h  | 17 +++++++++++++++++
 3 files changed, 19 insertions(+)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index a76be16572a03..bdc18c424efd1 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -558,6 +558,7 @@ else()
   check_cxx_compiler_flag(-Wambiguous-reversed-operator HAS_AMBIGUOUS_REVERSED_OPERATOR)
   # -Winterference-size was added in GCC 13
   check_cxx_compiler_flag(-Winterference-size HAS_INTERFERENCE_SIZE)
+  check_cxx_compiler_flag(-Warray-bounds HAS_ARRAY_BOUNDS)
   check_cxx_compiler_flag(-Wbitwise-instead-of-logical HAS_BITWISE_INSTEAD_OF_LOGICAL)
   check_cxx_compiler_flag(-Wcast-function-type HAS_CAST_FUNCTION_TYPE)
   check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)
diff --git a/cmake/onnxruntime_config.h.in b/cmake/onnxruntime_config.h.in
index f82a23bf4026b..a36f735c507ba 100644
--- a/cmake/onnxruntime_config.h.in
+++ b/cmake/onnxruntime_config.h.in
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#cmakedefine HAS_ARRAY_BOUNDS
 #cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
 #cmakedefine HAS_CAST_FUNCTION_TYPE
 #cmakedefine HAS_CATCH_VALUE
diff --git a/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h b/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h
index d2d9886ab61f7..a745dd9f1376d 100644
--- a/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h
+++ b/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h
@@ -1660,7 +1660,24 @@ MlasQ4Int8TileGemmKernelBlkLen32Avx2(
 
             if constexpr (NCols4 == 8) {
                 __m128 acc_0 = FoldAccumulators(acc[0], acc[1], acc[2], acc[3]);
+
+                // Clang is not happy with the code here, even if constexpr `NCols4 == 8` is always false in this context:
+                //
+                // In file included from .../onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2.cpp:26:
+                // .../onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h:1663:49: error: array index 4 is past the end of the array (that has type '__m256[4]') [-Werror,-Warray-bounds]
+                //  1663 |                 __m128 acc_1 = FoldAccumulators(acc[4], acc[5], acc[6], acc[7]);
+                //       |                                                 ^   ~
+                // .../onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h:1531:13: note: array 'acc' declared here
+                //  1531 |             __m256 acc[NCols4];
+                //       |             ^
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Warray-bounds"
+#endif
                 __m128 acc_1 = FoldAccumulators(acc[4], acc[5], acc[6], acc[7]);
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
                 if (BiasPtr != nullptr) {
                     acc_0 = _mm_add_ps(acc_0, _mm_loadu_ps(BiasPtr));
                     acc_1 = _mm_add_ps(acc_1, _mm_loadu_ps(BiasPtr + 4));
