From 9f8bce722e243a1a492a17d06bbdf55c64cafb8e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Apr 2026 09:02:13 +0200 Subject: [PATCH 1/2] perf: cache default ArraySpec for regular chunk grids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For regular grids, all chunks have the same codec_shape, so we can build the ArraySpec once and reuse it for every chunk — avoiding the per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. Adds _get_default_chunk_spec() and uses it in _get_selection and _set_selection. Saves ~5ms per 1000 chunks. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/zarr/core/array.py | 43 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4736805b9d..0f6531fdcc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -5778,6 +5778,37 @@ def _get_chunk_spec( ) +def _get_default_chunk_spec( + metadata: ArrayMetadata, + chunk_grid: ChunkGrid, + array_config: ArrayConfig, + prototype: BufferPrototype, +) -> ArraySpec | None: + """Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular. + + For regular grids, all chunks have the same codec_shape, so we can + build the ArraySpec once and reuse it for every chunk — avoiding the + per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. + + .. note:: + Ideally the per-chunk ArraySpec would not exist at all: dtype, + fill_value, config, and prototype are constant across chunks — + only the shape varies (and only for edge chunks). A cleaner + design would pass a single ArraySpec plus a per-chunk shape + override, which ChunkTransform.decode_chunk already supports + via its ``chunk_shape`` parameter. + """ + if chunk_grid.is_regular: + return ArraySpec( + shape=chunk_grid.chunk_shape, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=array_config, + prototype=prototype, + ) + return None + + async def _get_selection( store_path: StorePath, metadata: ArrayMetadata, @@ -5857,11 +5888,16 @@ async def _get_selection( # reading chunks and decoding them indexed_chunks = list(indexer) + # Pre-compute the default chunk spec for regular grids to avoid + # per-chunk ChunkGrid lookups and ArraySpec construction. + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) results = await codec_pipeline.read( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, @@ -6200,11 +6236,14 @@ async def _set_selection( _config = replace(_config, order=order) # merging with existing data and encoding chunks + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) await codec_pipeline.write( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, From 0cb71977c3957b05e7a4d14bfc3522f5c2db047e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Apr 2026 11:26:35 +0200 Subject: [PATCH 2/2] docs: changelog --- changes/3908.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3908.misc.md diff --git a/changes/3908.misc.md b/changes/3908.misc.md new file mode 100644 index 0000000000..66717e8444 --- /dev/null +++ b/changes/3908.misc.md @@ -0,0 +1 @@ +Reuse a constant `ArraySpec` during indexing when possible. \ No newline at end of file