diff --git a/changes/3908.misc.md b/changes/3908.misc.md new file mode 100644 index 0000000000..66717e8444 --- /dev/null +++ b/changes/3908.misc.md @@ -0,0 +1 @@ +Reuse a constant `ArraySpec` during indexing when possible. \ No newline at end of file diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f0cd5dd734..97eb3f6a30 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -5366,6 +5366,37 @@ def _get_chunk_spec( ) +def _get_default_chunk_spec( + metadata: ArrayMetadata, + chunk_grid: ChunkGrid, + array_config: ArrayConfig, + prototype: BufferPrototype, +) -> ArraySpec | None: + """Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular. + + For regular grids, all chunks have the same codec_shape, so we can + build the ArraySpec once and reuse it for every chunk — avoiding the + per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. + + .. note:: + Ideally the per-chunk ArraySpec would not exist at all: dtype, + fill_value, config, and prototype are constant across chunks — + only the shape varies (and only for edge chunks). A cleaner + design would pass a single ArraySpec plus a per-chunk shape + override, which ChunkTransform.decode_chunk already supports + via its ``chunk_shape`` parameter. + """ + if chunk_grid.is_regular: + return ArraySpec( + shape=chunk_grid.chunk_shape, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=array_config, + prototype=prototype, + ) + return None + + async def _get_selection( store_path: StorePath, metadata: ArrayMetadata, @@ -5445,11 +5476,16 @@ async def _get_selection( # reading chunks and decoding them indexed_chunks = list(indexer) + # Pre-compute the default chunk spec for regular grids to avoid + # per-chunk ChunkGrid lookups and ArraySpec construction. + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) results = await codec_pipeline.read( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, @@ -5788,11 +5824,14 @@ async def _set_selection( _config = replace(_config, order=order) # merging with existing data and encoding chunks + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) await codec_pipeline.write( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk,