From 73d9894dfd99e418095cdd32cddaccd2b563fa48 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 5 Feb 2026 10:03:25 -0600 Subject: [PATCH 1/2] fix: Handle inhomogeneous array shapes in to_arrays() When arrays have different shapes, np.array(values, dtype=object) can still fail due to numpy's broadcasting behavior. Instead, create an empty object array and assign elements individually. This matches v0.14.7 behavior where blobs were extracted from structured arrays with object dtype columns. Fixes #1380 Co-Authored-By: Claude Opus 4.5 --- src/datajoint/expression.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index a9a7ddfe7..3f75edeec 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -860,7 +860,10 @@ def to_arrays(self, *attrs, include_key=False, order_by=None, limit=None, offset arr = np.array(values) except ValueError: # Variable-size data (e.g., arrays of different shapes) - arr = np.array(values, dtype=object) + # Must assign individually to avoid numpy broadcasting issues + arr = np.empty(len(values), dtype=object) + for i, v in enumerate(values): + arr[i] = v result_arrays.append(arr) if include_key: From 328e99d52d332e011d297d2fdc8f48f14a1629ea Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 5 Feb 2026 10:07:55 -0600 Subject: [PATCH 2/2] test: Add tests for inhomogeneous array shapes in to_arrays() Regression tests for #1380 - verify that arrays of different shapes are handled correctly without ValueError. Co-Authored-By: Claude Opus 4.5 --- tests/integration/test_fetch.py | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/integration/test_fetch.py b/tests/integration/test_fetch.py index 8cde34deb..dd556ff70 100644 --- a/tests/integration/test_fetch.py +++ b/tests/integration/test_fetch.py @@ -397,3 +397,63 @@ def test_to_arrays_without_include_key(lang): names, langs = result assert isinstance(names, np.ndarray) assert isinstance(langs, np.ndarray) + + +def test_to_arrays_inhomogeneous_shapes(schema_any): + """Test to_arrays handles arrays of different shapes correctly. + + Regression test for https://github.com/datajoint/datajoint-python/issues/1380 + """ + table = schema.Longblob() + table.delete() + + # Insert arrays with different shapes that numpy would try to broadcast + table.insert( + [ + {"id": 0, "data": np.random.randn(100)}, # shape (100,) + {"id": 1, "data": np.random.randn(100, 1)}, # shape (100, 1) + {"id": 2, "data": np.random.randn(100, 2)}, # shape (100, 2) + ] + ) + + # This should not raise ValueError + data = table.to_arrays("data", order_by="id") + + # Should return object array with 3 elements + assert data.dtype == object + assert len(data) == 3 + + # Each element should preserve its original shape + assert data[0].shape == (100,) + assert data[1].shape == (100, 1) + assert data[2].shape == (100, 2) + + +def test_to_arrays_inhomogeneous_shapes_second_axis(schema_any): + """Test to_arrays handles arrays differing on second axis. + + Regression test for https://github.com/datajoint/datajoint-python/issues/1380 + """ + table = schema.Longblob() + table.delete() + + # Insert arrays with different shapes on second axis + table.insert( + [ + {"id": 0, "data": np.random.randn(100)}, # shape (100,) + {"id": 1, "data": np.random.randn(1, 100)}, # shape (1, 100) + {"id": 2, "data": np.random.randn(2, 100)}, # shape (2, 100) + ] + ) + + # This should not raise ValueError + data = table.to_arrays("data", order_by="id") + + # Should return object array with 3 elements + assert data.dtype == object + assert len(data) == 3 + + # Each element should preserve its original shape + assert data[0].shape == (100,) + assert data[1].shape == (1, 100) + assert data[2].shape == (2, 100)