From ad6a187356f10256b6cdd3c8ba374c9c62e8f146 Mon Sep 17 00:00:00 2001
From: KD2YCU <me@kd2ycu.com>
Date: Fri, 3 Apr 2026 14:17:30 -0400
Subject: [PATCH 1/6] PR #3358 fixing get_shape() bounds checking and updating
 per gguf specs where dim = 4

---
 docs/src/usage/saving_and_loading.rst |  7 ++++
 mlx/io/gguf.cpp                       |  8 ++++
 mlx/io/gguf.h                         |  7 ++++
 python/tests/test_load.py             | 37 ++++++++++++++++++
 tests/load_tests.cpp                  | 56 +++++++++++++++++++++++++++
 5 files changed, 115 insertions(+)
diff --git a/docs/src/usage/saving_and_loading.rst b/docs/src/usage/saving_and_loading.rst
index 43f2a79990..d2366e4d61 100644
--- a/docs/src/usage/saving_and_loading.rst
+++ b/docs/src/usage/saving_and_loading.rst
@@ -79,3 +79,10 @@ The functions :func:`save_safetensors` and :func:`save_gguf` are similar to
    >>> a = mx.array([1.0])
    >>> b = mx.array([2.0])
    >>> mx.save_safetensors("arrays", {"a": a, "b": b})
+
+.. note::
+
+   When loading GGUF files from untrusted sources, MLX validates tensor
+   dimensions against the maximum supported by the format (8 dimensions).
+   Files with invalid dimension counts will raise an error rather than
+   risk reading out-of-bounds memory.
diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp
index 206f6fb31f..cd52ed32d1 100644
--- a/mlx/io/gguf.cpp
+++ b/mlx/io/gguf.cpp
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <fstream>
 #include <numeric>
+#include <sstream>
 
 #include "mlx/io/gguf.h"
 #include "mlx/ops.h"
@@ -48,6 +49,13 @@ std::optional<Dtype> gguf_type_to_dtype(const uint32_t& gguf_type) {
 }
 
 Shape get_shape(const gguf_tensor& tensor) {
+  if (tensor.ndim > MLX_GGUF_MAX_DIMS) {
+    std::ostringstream msg;
+    msg << "[load_gguf] Tensor has " << tensor.ndim
+        << " dimensions, but the maximum supported is " << MLX_GGUF_MAX_DIMS
+        << ". The file may be corrupt or malicious.";
+    throw std::runtime_error(msg.str());
+  }
   Shape shape;
   // The dimension order in GGML is the reverse of the order used in MLX.
   for (int i = tensor.ndim - 1; i >= 0; i--) {
diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h
index fa5bc458de..418c4fbf3d 100644
--- a/mlx/io/gguf.h
+++ b/mlx/io/gguf.h
@@ -10,6 +10,13 @@ extern "C" {
 #include <gguflib.h>
 }
 
+// Maximum number of tensor dimensions supported by the GGUF format.
+// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. Override at compile time
+// with -DMLX_GGUF_MAX_DIMS=<value> if the upstream format changes.
+#ifndef MLX_GGUF_MAX_DIMS
+#define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM
+#endif
+
 namespace mlx::core {
 
 Shape get_shape(const gguf_tensor& tensor);
diff --git a/python/tests/test_load.py b/python/tests/test_load.py
index 97fbf6c4c5..b887b5f289 100644
--- a/python/tests/test_load.py
+++ b/python/tests/test_load.py
@@ -300,6 +300,43 @@ def test_save_and_load_gguf_metadata_mixed(self):
             else:
                 self.assertEqual(meta_load_dict[k], v)
 
+    @unittest.skipIf(platform.system() == "Windows", "GGUF is disabled on Windows")
+    def test_gguf_rejects_oversized_ndim(self):
+        """Verify that loading a GGUF file with ndim > 8 raises an error."""
+        import struct
+
+        malicious_ndim = 32
+        tensor_name = b"weight"
+        alignment = 32
+
+        buf = bytearray()
+        # GGUF header (v3)
+        buf += b"GGUF"
+        buf += struct.pack("<I", 3)  # version
+        buf += struct.pack("<Q", 1)  # tensor_count
+        buf += struct.pack("<Q", 0)  # metadata_kv_count
+
+        # Tensor info
+        buf += struct.pack("<Q", len(tensor_name))
+        buf += tensor_name
+        buf += struct.pack("<I", malicious_ndim)  # ndim = 32 (malicious)
+        for _ in range(malicious_ndim):
+            buf += struct.pack("<Q", 1)  # each dim = 1
+        buf += struct.pack("<I", 0)  # type = F32
+        buf += struct.pack("<Q", 0)  # offset = 0
+
+        # Pad to alignment, then append tensor data
+        pad_needed = (alignment - (len(buf) % alignment)) % alignment
+        buf += b"\x00" * pad_needed
+        buf += struct.pack("<f", 1.0)
+
+        bad_file = os.path.join(self.test_dir, "bad_ndim.gguf")
+        with open(bad_file, "wb") as f:
+            f.write(buf)
+
+        with self.assertRaises(RuntimeError):
+            mx.load(bad_file)
+
     def test_save_and_load_fs(self):
         if not os.path.isdir(self.test_dir):
             os.mkdir(self.test_dir)
diff --git a/tests/load_tests.cpp b/tests/load_tests.cpp
index 1531ce060c..1dd7c7a938 100644
--- a/tests/load_tests.cpp
+++ b/tests/load_tests.cpp
@@ -1,6 +1,7 @@
 // Copyright © 2023 Apple Inc.
 
 #include <filesystem>
+#include <fstream>
 #include <stdexcept>
 #include <vector>
 
@@ -201,6 +202,61 @@ TEST_CASE("test gguf metadata") {
   }
 }
 
+TEST_CASE("test gguf rejects oversized ndim") {
+  // Build a minimal GGUF v3 file with ndim=32 (exceeds GGUF_TENSOR_MAX_DIM=8).
+  // Verifies the bounds check in get_shape() catches malicious files.
+  std::string file_path = get_temp_file("test_bad_ndim.gguf");
+
+  constexpr uint32_t malicious_ndim = 32;
+  constexpr const char* tensor_name = "weight";
+  constexpr size_t name_len = 6;
+  constexpr uint32_t alignment = 32;
+
+  auto write_le32 = [](std::ofstream& f, uint32_t v) {
+    char buf[4];
+    for (int i = 0; i < 4; i++)
+      buf[i] = (v >> (i * 8)) & 0xFF;
+    f.write(buf, 4);
+  };
+  auto write_le64 = [](std::ofstream& f, uint64_t v) {
+    char buf[8];
+    for (int i = 0; i < 8; i++)
+      buf[i] = (v >> (i * 8)) & 0xFF;
+    f.write(buf, 8);
+  };
+
+  {
+    std::ofstream f(file_path, std::ios::binary);
+    // GGUF header
+    f.write("GGUF", 4);
+    write_le32(f, 3);              // version
+    write_le64(f, 1);              // tensor_count = 1
+    write_le64(f, 0);              // metadata_kv_count = 0
+
+    // Tensor info
+    write_le64(f, name_len);
+    f.write(tensor_name, name_len);
+    write_le32(f, malicious_ndim); // ndim = 32 (malicious)
+    for (uint32_t i = 0; i < malicious_ndim; i++) {
+      write_le64(f, 1);           // each dim = 1
+    }
+    write_le32(f, 0);             // type = F32
+    write_le64(f, 0);             // offset = 0
+
+    // Pad to alignment, then write tensor data
+    auto pos = f.tellp();
+    size_t data_start =
+        ((size_t)pos + alignment - 1) & ~(size_t)(alignment - 1);
+    for (size_t i = (size_t)pos; i < data_start; i++) {
+      f.put(0);
+    }
+    float one = 1.0f;
+    f.write(reinterpret_cast<const char*>(&one), sizeof(float));
+  }
+
+  CHECK_THROWS_AS(load_gguf(file_path), std::runtime_error);
+}
+
 TEST_CASE("test single array serialization") {
   // Basic test
   {

From e30cf90e96499c9b6a289222c764c0223e6d3fde Mon Sep 17 00:00:00 2001
From: KD2YCU <me@kd2ycu.com>
Date: Fri, 3 Apr 2026 14:23:42 -0400
Subject: [PATCH 2/6] pre-commit run

---
 tests/load_tests.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/load_tests.cpp b/tests/load_tests.cpp
index 1dd7c7a938..172fe02c1e 100644
--- a/tests/load_tests.cpp
+++ b/tests/load_tests.cpp
@@ -229,19 +229,19 @@ TEST_CASE("test gguf rejects oversized ndim") {
     std::ofstream f(file_path, std::ios::binary);
     // GGUF header
     f.write("GGUF", 4);
-    write_le32(f, 3);              // version
-    write_le64(f, 1);              // tensor_count = 1
-    write_le64(f, 0);              // metadata_kv_count = 0
+    write_le32(f, 3); // version
+    write_le64(f, 1); // tensor_count = 1
+    write_le64(f, 0); // metadata_kv_count = 0
 
     // Tensor info
     write_le64(f, name_len);
     f.write(tensor_name, name_len);
     write_le32(f, malicious_ndim); // ndim = 32 (malicious)
     for (uint32_t i = 0; i < malicious_ndim; i++) {
-      write_le64(f, 1);           // each dim = 1
+      write_le64(f, 1); // each dim = 1
     }
-    write_le32(f, 0);             // type = F32
-    write_le64(f, 0);             // offset = 0
+    write_le32(f, 0); // type = F32
+    write_le64(f, 0); // offset = 0
 
     // Pad to alignment, then write tensor data
     auto pos = f.tellp();

From ed8ef656931e918a3c54d2777c6046f36680bf82 Mon Sep 17 00:00:00 2001
From: Dan Anderson <dan@kd2ycu.com>
Date: Fri, 3 Apr 2026 21:32:08 -0400
Subject: [PATCH 3/6] Remove note on GGUF file loading safety

Removed note about GGUF file loading validation.
---
 docs/src/usage/saving_and_loading.rst | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docs/src/usage/saving_and_loading.rst b/docs/src/usage/saving_and_loading.rst
index d2366e4d61..43f2a79990 100644
--- a/docs/src/usage/saving_and_loading.rst
+++ b/docs/src/usage/saving_and_loading.rst
@@ -79,10 +79,3 @@ The functions :func:`save_safetensors` and :func:`save_gguf` are similar to
    >>> a = mx.array([1.0])
    >>> b = mx.array([2.0])
    >>> mx.save_safetensors("arrays", {"a": a, "b": b})
-
-.. note::
-
-   When loading GGUF files from untrusted sources, MLX validates tensor
-   dimensions against the maximum supported by the format (8 dimensions).
-   Files with invalid dimension counts will raise an error rather than
-   risk reading out-of-bounds memory.

From 6a6f44f965eab03e7744f533de9e6f6be677f8c0 Mon Sep 17 00:00:00 2001
From: KD2YCU <me@kd2ycu.com>
Date: Fri, 3 Apr 2026 21:53:21 -0400
Subject: [PATCH 4/6] Updates per feedback on PR#3359

---
 mlx/io/gguf.cpp           | 13 +++++++------
 mlx/io/gguf.h             |  5 +----
 python/tests/test_load.py | 37 -------------------------------------
 3 files changed, 8 insertions(+), 47 deletions(-)

diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp
index cd52ed32d1..e8ddbff94c 100644
--- a/mlx/io/gguf.cpp
+++ b/mlx/io/gguf.cpp
@@ -1,10 +1,10 @@
 // Copyright © 2023-2024 Apple Inc.
 
+#include <fmt/format.h>
 #include <cstdint>
 #include <cstring>
 #include <fstream>
 #include <numeric>
-#include <sstream>
 
 #include "mlx/io/gguf.h"
 #include "mlx/ops.h"
@@ -50,11 +50,12 @@ std::optional<Dtype> gguf_type_to_dtype(const uint32_t& gguf_type) {
 
 Shape get_shape(const gguf_tensor& tensor) {
   if (tensor.ndim > MLX_GGUF_MAX_DIMS) {
-    std::ostringstream msg;
-    msg << "[load_gguf] Tensor has " << tensor.ndim
-        << " dimensions, but the maximum supported is " << MLX_GGUF_MAX_DIMS
-        << ". The file may be corrupt or malicious.";
-    throw std::runtime_error(msg.str());
+    throw std::runtime_error(
+        fmt::format(
+            "[load_gguf] Tensor has {} dimensions, but the maximum supported is {}."
+            " The file may be corrupt or malicious.",
+            tensor.ndim,
+            MLX_GGUF_MAX_DIMS));
   }
   Shape shape;
   // The dimension order in GGML is the reverse of the order used in MLX.
diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h
index 418c4fbf3d..71c803b986 100644
--- a/mlx/io/gguf.h
+++ b/mlx/io/gguf.h
@@ -11,11 +11,8 @@ extern "C" {
 }
 
 // Maximum number of tensor dimensions supported by the GGUF format.
-// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. Override at compile time
-// with -DMLX_GGUF_MAX_DIMS=<value> if the upstream format changes.
-#ifndef MLX_GGUF_MAX_DIMS
+// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h.
 #define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM
-#endif
 
 namespace mlx::core {
 
diff --git a/python/tests/test_load.py b/python/tests/test_load.py
index b887b5f289..97fbf6c4c5 100644
--- a/python/tests/test_load.py
+++ b/python/tests/test_load.py
@@ -300,43 +300,6 @@ def test_save_and_load_gguf_metadata_mixed(self):
             else:
                 self.assertEqual(meta_load_dict[k], v)
 
-    @unittest.skipIf(platform.system() == "Windows", "GGUF is disabled on Windows")
-    def test_gguf_rejects_oversized_ndim(self):
-        """Verify that loading a GGUF file with ndim > 8 raises an error."""
-        import struct
-
-        malicious_ndim = 32
-        tensor_name = b"weight"
-        alignment = 32
-
-        buf = bytearray()
-        # GGUF header (v3)
-        buf += b"GGUF"
-        buf += struct.pack("<I", 3)  # version
-        buf += struct.pack("<Q", 1)  # tensor_count
-        buf += struct.pack("<Q", 0)  # metadata_kv_count
-
-        # Tensor info
-        buf += struct.pack("<Q", len(tensor_name))
-        buf += tensor_name
-        buf += struct.pack("<I", malicious_ndim)  # ndim = 32 (malicious)
-        for _ in range(malicious_ndim):
-            buf += struct.pack("<Q", 1)  # each dim = 1
-        buf += struct.pack("<I", 0)  # type = F32
-        buf += struct.pack("<Q", 0)  # offset = 0
-
-        # Pad to alignment, then append tensor data
-        pad_needed = (alignment - (len(buf) % alignment)) % alignment
-        buf += b"\x00" * pad_needed
-        buf += struct.pack("<f", 1.0)
-
-        bad_file = os.path.join(self.test_dir, "bad_ndim.gguf")
-        with open(bad_file, "wb") as f:
-            f.write(buf)
-
-        with self.assertRaises(RuntimeError):
-            mx.load(bad_file)
-
     def test_save_and_load_fs(self):
         if not os.path.isdir(self.test_dir):
             os.mkdir(self.test_dir)

From c45b7c37774f369e8dd15b1a778549e43d2464bf Mon Sep 17 00:00:00 2001
From: Cheng <git@zcbenz.com>
Date: Sat, 4 Apr 2026 15:21:32 +0900
Subject: [PATCH 5/6] Remove MLX_GGUF_MAX_DIMS

---
 mlx/io/gguf.cpp | 7 ++++---
 mlx/io/gguf.h   | 4 ----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp
index e8ddbff94c..c4dd93651b 100644
--- a/mlx/io/gguf.cpp
+++ b/mlx/io/gguf.cpp
@@ -1,11 +1,12 @@
 // Copyright © 2023-2024 Apple Inc.
 
-#include <fmt/format.h>
 #include <cstdint>
 #include <cstring>
 #include <fstream>
 #include <numeric>
 
+#include <fmt/format.h>
+
 #include "mlx/io/gguf.h"
 #include "mlx/ops.h"
 
@@ -49,13 +50,13 @@ std::optional<Dtype> gguf_type_to_dtype(const uint32_t& gguf_type) {
 }
 
 Shape get_shape(const gguf_tensor& tensor) {
-  if (tensor.ndim > MLX_GGUF_MAX_DIMS) {
+  if (tensor.ndim > GGUF_TENSOR_MAX_DIM) {
     throw std::runtime_error(
         fmt::format(
             "[load_gguf] Tensor has {} dimensions, but the maximum supported is {}."
             " The file may be corrupt or malicious.",
             tensor.ndim,
-            MLX_GGUF_MAX_DIMS));
+            GGUF_TENSOR_MAX_DIM));
   }
   Shape shape;
   // The dimension order in GGML is the reverse of the order used in MLX.
diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h
index 71c803b986..fa5bc458de 100644
--- a/mlx/io/gguf.h
+++ b/mlx/io/gguf.h
@@ -10,10 +10,6 @@ extern "C" {
 #include <gguflib.h>
 }
 
-// Maximum number of tensor dimensions supported by the GGUF format.
-// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h.
-#define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM
-
 namespace mlx::core {
 
 Shape get_shape(const gguf_tensor& tensor);

From 09c0be884de373e5e3d497d4aa3c67345f55448d Mon Sep 17 00:00:00 2001
From: KD2YCU <me@kd2ycu.com>
Date: Tue, 7 Apr 2026 22:24:12 -0400
Subject: [PATCH 6/6] Define NDEBUG for gguflib to disable assert() on
 untrusted input

gguflib uses assert() to validate tensor.ndim, which is compiled out
in release builds (NDEBUG) but fires in debug/CI builds, aborting the
process before get_shape()'s runtime bounds check can run. Define
NDEBUG for the gguflib static library so the proper runtime check in
get_shape() is the sole guard against malicious ndim values.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 mlx/io/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlx/io/CMakeLists.txt b/mlx/io/CMakeLists.txt
index eebd7618b3..59e3155278 100644
--- a/mlx/io/CMakeLists.txt
+++ b/mlx/io/CMakeLists.txt
@@ -17,6 +17,7 @@ if(MLX_BUILD_GGUF)
                              PRIVATE $<BUILD_INTERFACE:${gguflib_SOURCE_DIR}>)
   add_library(gguflib STATIC ${gguflib_SOURCE_DIR}/fp16.c
                              ${gguflib_SOURCE_DIR}/gguflib.c)
+  target_compile_definitions(gguflib PRIVATE NDEBUG)
   target_link_libraries(mlx PRIVATE $<BUILD_INTERFACE:gguflib>)
   target_sources(mlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/gguf.cpp
                              ${CMAKE_CURRENT_SOURCE_DIR}/gguf_quants.cpp)