Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3908.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reuse a constant `ArraySpec` during indexing when possible.
43 changes: 41 additions & 2 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5366,6 +5366,37 @@ def _get_chunk_spec(
)


def _get_default_chunk_spec(
metadata: ArrayMetadata,
chunk_grid: ChunkGrid,
array_config: ArrayConfig,
prototype: BufferPrototype,
) -> ArraySpec | None:
"""Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular.

For regular grids, all chunks have the same codec_shape, so we can
build the ArraySpec once and reuse it for every chunk — avoiding the
per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead.

.. note::
Ideally the per-chunk ArraySpec would not exist at all: dtype,
fill_value, config, and prototype are constant across chunks —
only the shape varies (and only for edge chunks). A cleaner
design would pass a single ArraySpec plus a per-chunk shape
override, which ChunkTransform.decode_chunk already supports
via its ``chunk_shape`` parameter.
"""
if chunk_grid.is_regular:
return ArraySpec(
shape=chunk_grid.chunk_shape,
dtype=metadata.dtype,
fill_value=metadata.fill_value,
config=array_config,
prototype=prototype,
)
return None


async def _get_selection(
store_path: StorePath,
metadata: ArrayMetadata,
Expand Down Expand Up @@ -5445,11 +5476,16 @@ async def _get_selection(

# reading chunks and decoding them
indexed_chunks = list(indexer)
# Pre-compute the default chunk spec for regular grids to avoid
# per-chunk ChunkGrid lookups and ArraySpec construction.
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
results = await codec_pipeline.read(
[
(
store_path / metadata.encode_chunk_key(chunk_coords),
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
default_spec
if default_spec is not None
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
chunk_selection,
out_selection,
is_complete_chunk,
Expand Down Expand Up @@ -5788,11 +5824,14 @@ async def _set_selection(
_config = replace(_config, order=order)

# merging with existing data and encoding chunks
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
await codec_pipeline.write(
[
(
store_path / metadata.encode_chunk_key(chunk_coords),
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
default_spec
if default_spec is not None
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
chunk_selection,
out_selection,
is_complete_chunk,
Expand Down
Loading