From ad6a187356f10256b6cdd3c8ba374c9c62e8f146 Mon Sep 17 00:00:00 2001 From: KD2YCU Date: Fri, 3 Apr 2026 14:17:30 -0400 Subject: [PATCH 1/6] PR #3358 fixing get_shape() bounds checking and updating per gguf specs where dim = 4 --- docs/src/usage/saving_and_loading.rst | 7 ++++ mlx/io/gguf.cpp | 8 ++++ mlx/io/gguf.h | 7 ++++ python/tests/test_load.py | 37 ++++++++++++++++++ tests/load_tests.cpp | 56 +++++++++++++++++++++++++++ 5 files changed, 115 insertions(+) diff --git a/docs/src/usage/saving_and_loading.rst b/docs/src/usage/saving_and_loading.rst index 43f2a79990..d2366e4d61 100644 --- a/docs/src/usage/saving_and_loading.rst +++ b/docs/src/usage/saving_and_loading.rst @@ -79,3 +79,10 @@ The functions :func:`save_safetensors` and :func:`save_gguf` are similar to >>> a = mx.array([1.0]) >>> b = mx.array([2.0]) >>> mx.save_safetensors("arrays", {"a": a, "b": b}) + +.. note:: + + When loading GGUF files from untrusted sources, MLX validates tensor + dimensions against the maximum supported by the format (8 dimensions). + Files with invalid dimension counts will raise an error rather than + risk reading out-of-bounds memory. diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp index 206f6fb31f..cd52ed32d1 100644 --- a/mlx/io/gguf.cpp +++ b/mlx/io/gguf.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "mlx/io/gguf.h" #include "mlx/ops.h" @@ -48,6 +49,13 @@ std::optional gguf_type_to_dtype(const uint32_t& gguf_type) { } Shape get_shape(const gguf_tensor& tensor) { + if (tensor.ndim > MLX_GGUF_MAX_DIMS) { + std::ostringstream msg; + msg << "[load_gguf] Tensor has " << tensor.ndim + << " dimensions, but the maximum supported is " << MLX_GGUF_MAX_DIMS + << ". The file may be corrupt or malicious."; + throw std::runtime_error(msg.str()); + } Shape shape; // The dimension order in GGML is the reverse of the order used in MLX. for (int i = tensor.ndim - 1; i >= 0; i--) { diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h index fa5bc458de..418c4fbf3d 100644 --- a/mlx/io/gguf.h +++ b/mlx/io/gguf.h @@ -10,6 +10,13 @@ extern "C" { #include } +// Maximum number of tensor dimensions supported by the GGUF format. +// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. Override at compile time +// with -DMLX_GGUF_MAX_DIMS= if the upstream format changes. +#ifndef MLX_GGUF_MAX_DIMS +#define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM +#endif + namespace mlx::core { Shape get_shape(const gguf_tensor& tensor); diff --git a/python/tests/test_load.py b/python/tests/test_load.py index 97fbf6c4c5..b887b5f289 100644 --- a/python/tests/test_load.py +++ b/python/tests/test_load.py @@ -300,6 +300,43 @@ def test_save_and_load_gguf_metadata_mixed(self): else: self.assertEqual(meta_load_dict[k], v) + @unittest.skipIf(platform.system() == "Windows", "GGUF is disabled on Windows") + def test_gguf_rejects_oversized_ndim(self): + """Verify that loading a GGUF file with ndim > 8 raises an error.""" + import struct + + malicious_ndim = 32 + tensor_name = b"weight" + alignment = 32 + + buf = bytearray() + # GGUF header (v3) + buf += b"GGUF" + buf += struct.pack(" +#include #include #include @@ -201,6 +202,61 @@ TEST_CASE("test gguf metadata") { } } +TEST_CASE("test gguf rejects oversized ndim") { + // Build a minimal GGUF v3 file with ndim=32 (exceeds GGUF_TENSOR_MAX_DIM=8). + // Verifies the bounds check in get_shape() catches malicious files. + std::string file_path = get_temp_file("test_bad_ndim.gguf"); + + constexpr uint32_t malicious_ndim = 32; + constexpr const char* tensor_name = "weight"; + constexpr size_t name_len = 6; + constexpr uint32_t alignment = 32; + + auto write_le32 = [](std::ofstream& f, uint32_t v) { + char buf[4]; + for (int i = 0; i < 4; i++) + buf[i] = (v >> (i * 8)) & 0xFF; + f.write(buf, 4); + }; + auto write_le64 = [](std::ofstream& f, uint64_t v) { + char buf[8]; + for (int i = 0; i < 8; i++) + buf[i] = (v >> (i * 8)) & 0xFF; + f.write(buf, 8); + }; + + { + std::ofstream f(file_path, std::ios::binary); + // GGUF header + f.write("GGUF", 4); + write_le32(f, 3); // version + write_le64(f, 1); // tensor_count = 1 + write_le64(f, 0); // metadata_kv_count = 0 + + // Tensor info + write_le64(f, name_len); + f.write(tensor_name, name_len); + write_le32(f, malicious_ndim); // ndim = 32 (malicious) + for (uint32_t i = 0; i < malicious_ndim; i++) { + write_le64(f, 1); // each dim = 1 + } + write_le32(f, 0); // type = F32 + write_le64(f, 0); // offset = 0 + + // Pad to alignment, then write tensor data + auto pos = f.tellp(); + size_t data_start = + ((size_t)pos + alignment - 1) & ~(size_t)(alignment - 1); + for (size_t i = (size_t)pos; i < data_start; i++) { + f.put(0); + } + float one = 1.0f; + f.write(reinterpret_cast(&one), sizeof(float)); + } + + CHECK_THROWS_AS(load_gguf(file_path), std::runtime_error); +} + TEST_CASE("test single array serialization") { // Basic test { From e30cf90e96499c9b6a289222c764c0223e6d3fde Mon Sep 17 00:00:00 2001 From: KD2YCU Date: Fri, 3 Apr 2026 14:23:42 -0400 Subject: [PATCH 2/6] pre-commit run --- tests/load_tests.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/load_tests.cpp b/tests/load_tests.cpp index 1dd7c7a938..172fe02c1e 100644 --- a/tests/load_tests.cpp +++ b/tests/load_tests.cpp @@ -229,19 +229,19 @@ TEST_CASE("test gguf rejects oversized ndim") { std::ofstream f(file_path, std::ios::binary); // GGUF header f.write("GGUF", 4); - write_le32(f, 3); // version - write_le64(f, 1); // tensor_count = 1 - write_le64(f, 0); // metadata_kv_count = 0 + write_le32(f, 3); // version + write_le64(f, 1); // tensor_count = 1 + write_le64(f, 0); // metadata_kv_count = 0 // Tensor info write_le64(f, name_len); f.write(tensor_name, name_len); write_le32(f, malicious_ndim); // ndim = 32 (malicious) for (uint32_t i = 0; i < malicious_ndim; i++) { - write_le64(f, 1); // each dim = 1 + write_le64(f, 1); // each dim = 1 } - write_le32(f, 0); // type = F32 - write_le64(f, 0); // offset = 0 + write_le32(f, 0); // type = F32 + write_le64(f, 0); // offset = 0 // Pad to alignment, then write tensor data auto pos = f.tellp(); From ed8ef656931e918a3c54d2777c6046f36680bf82 Mon Sep 17 00:00:00 2001 From: Dan Anderson Date: Fri, 3 Apr 2026 21:32:08 -0400 Subject: [PATCH 3/6] Remove note on GGUF file loading safety Removed note about GGUF file loading validation. --- docs/src/usage/saving_and_loading.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/src/usage/saving_and_loading.rst b/docs/src/usage/saving_and_loading.rst index d2366e4d61..43f2a79990 100644 --- a/docs/src/usage/saving_and_loading.rst +++ b/docs/src/usage/saving_and_loading.rst @@ -79,10 +79,3 @@ The functions :func:`save_safetensors` and :func:`save_gguf` are similar to >>> a = mx.array([1.0]) >>> b = mx.array([2.0]) >>> mx.save_safetensors("arrays", {"a": a, "b": b}) - -.. note:: - - When loading GGUF files from untrusted sources, MLX validates tensor - dimensions against the maximum supported by the format (8 dimensions). - Files with invalid dimension counts will raise an error rather than - risk reading out-of-bounds memory. From 6a6f44f965eab03e7744f533de9e6f6be677f8c0 Mon Sep 17 00:00:00 2001 From: KD2YCU Date: Fri, 3 Apr 2026 21:53:21 -0400 Subject: [PATCH 4/6] Updates per feedback on PR#3359 --- mlx/io/gguf.cpp | 13 +++++++------ mlx/io/gguf.h | 5 +---- python/tests/test_load.py | 37 ------------------------------------- 3 files changed, 8 insertions(+), 47 deletions(-) diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp index cd52ed32d1..e8ddbff94c 100644 --- a/mlx/io/gguf.cpp +++ b/mlx/io/gguf.cpp @@ -1,10 +1,10 @@ // Copyright © 2023-2024 Apple Inc. +#include #include #include #include #include -#include #include "mlx/io/gguf.h" #include "mlx/ops.h" @@ -50,11 +50,12 @@ std::optional gguf_type_to_dtype(const uint32_t& gguf_type) { Shape get_shape(const gguf_tensor& tensor) { if (tensor.ndim > MLX_GGUF_MAX_DIMS) { - std::ostringstream msg; - msg << "[load_gguf] Tensor has " << tensor.ndim - << " dimensions, but the maximum supported is " << MLX_GGUF_MAX_DIMS - << ". The file may be corrupt or malicious."; - throw std::runtime_error(msg.str()); + throw std::runtime_error( + fmt::format( + "[load_gguf] Tensor has {} dimensions, but the maximum supported is {}." + " The file may be corrupt or malicious.", + tensor.ndim, + MLX_GGUF_MAX_DIMS)); } Shape shape; // The dimension order in GGML is the reverse of the order used in MLX. diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h index 418c4fbf3d..71c803b986 100644 --- a/mlx/io/gguf.h +++ b/mlx/io/gguf.h @@ -11,11 +11,8 @@ extern "C" { } // Maximum number of tensor dimensions supported by the GGUF format. -// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. Override at compile time -// with -DMLX_GGUF_MAX_DIMS= if the upstream format changes. -#ifndef MLX_GGUF_MAX_DIMS +// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. #define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM -#endif namespace mlx::core { diff --git a/python/tests/test_load.py b/python/tests/test_load.py index b887b5f289..97fbf6c4c5 100644 --- a/python/tests/test_load.py +++ b/python/tests/test_load.py @@ -300,43 +300,6 @@ def test_save_and_load_gguf_metadata_mixed(self): else: self.assertEqual(meta_load_dict[k], v) - @unittest.skipIf(platform.system() == "Windows", "GGUF is disabled on Windows") - def test_gguf_rejects_oversized_ndim(self): - """Verify that loading a GGUF file with ndim > 8 raises an error.""" - import struct - - malicious_ndim = 32 - tensor_name = b"weight" - alignment = 32 - - buf = bytearray() - # GGUF header (v3) - buf += b"GGUF" - buf += struct.pack(" Date: Sat, 4 Apr 2026 15:21:32 +0900 Subject: [PATCH 5/6] Remove MLX_GGUF_MAX_DIMS --- mlx/io/gguf.cpp | 7 ++++--- mlx/io/gguf.h | 4 ---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp index e8ddbff94c..c4dd93651b 100644 --- a/mlx/io/gguf.cpp +++ b/mlx/io/gguf.cpp @@ -1,11 +1,12 @@ // Copyright © 2023-2024 Apple Inc. -#include #include #include #include #include +#include + #include "mlx/io/gguf.h" #include "mlx/ops.h" @@ -49,13 +50,13 @@ std::optional gguf_type_to_dtype(const uint32_t& gguf_type) { } Shape get_shape(const gguf_tensor& tensor) { - if (tensor.ndim > MLX_GGUF_MAX_DIMS) { + if (tensor.ndim > GGUF_TENSOR_MAX_DIM) { throw std::runtime_error( fmt::format( "[load_gguf] Tensor has {} dimensions, but the maximum supported is {}." " The file may be corrupt or malicious.", tensor.ndim, - MLX_GGUF_MAX_DIMS)); + GGUF_TENSOR_MAX_DIM)); } Shape shape; // The dimension order in GGML is the reverse of the order used in MLX. diff --git a/mlx/io/gguf.h b/mlx/io/gguf.h index 71c803b986..fa5bc458de 100644 --- a/mlx/io/gguf.h +++ b/mlx/io/gguf.h @@ -10,10 +10,6 @@ extern "C" { #include } -// Maximum number of tensor dimensions supported by the GGUF format. -// Mirrors GGUF_TENSOR_MAX_DIM from gguflib.h. -#define MLX_GGUF_MAX_DIMS GGUF_TENSOR_MAX_DIM - namespace mlx::core { Shape get_shape(const gguf_tensor& tensor); From 09c0be884de373e5e3d497d4aa3c67345f55448d Mon Sep 17 00:00:00 2001 From: KD2YCU Date: Tue, 7 Apr 2026 22:24:12 -0400 Subject: [PATCH 6/6] Define NDEBUG for gguflib to disable assert() on untrusted input gguflib uses assert() to validate tensor.ndim, which is compiled out in release builds (NDEBUG) but fires in debug/CI builds, aborting the process before get_shape()'s runtime bounds check can run. Define NDEBUG for the gguflib static library so the proper runtime check in get_shape() is the sole guard against malicious ndim values. Co-Authored-By: Claude Opus 4.6 (1M context) --- mlx/io/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlx/io/CMakeLists.txt b/mlx/io/CMakeLists.txt index eebd7618b3..59e3155278 100644 --- a/mlx/io/CMakeLists.txt +++ b/mlx/io/CMakeLists.txt @@ -17,6 +17,7 @@ if(MLX_BUILD_GGUF) PRIVATE $) add_library(gguflib STATIC ${gguflib_SOURCE_DIR}/fp16.c ${gguflib_SOURCE_DIR}/gguflib.c) + target_compile_definitions(gguflib PRIVATE NDEBUG) target_link_libraries(mlx PRIVATE $) target_sources(mlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/gguf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gguf_quants.cpp)