From cd396abed1449b53392b38365cc6632982242b13 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 4 Mar 2026 21:16:14 +0000 Subject: [PATCH 1/3] Support Zarr 2 and 3 (again) --- pyproject.toml | 2 +- tests/test_cli.py | 6 ++--- tests/test_compression.py | 18 +++++++-------- tszip/_zarr_compat.py | 47 +++++++++++++++++++++++++++++++++++++++ tszip/compression.py | 15 ++++++------- uv.lock | 2 +- 6 files changed, 68 insertions(+), 22 deletions(-) create mode 100644 tszip/_zarr_compat.py diff --git a/pyproject.toml b/pyproject.toml index 4c07c66..c7d258a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "numpy", "humanize", "tskit>=1.0.0", - "zarr>=3.1", + "zarr>=2.18", ] dynamic = ["version"] diff --git a/tests/test_cli.py b/tests/test_cli.py index 1ffcaa4..efa9ebf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -32,10 +32,10 @@ import numpy as np import pytest import tskit -import zarr import tszip import tszip.cli as cli +from tszip import _zarr_compat def get_stdout_for_pytest(): @@ -265,8 +265,8 @@ def test_chunk_size(self): assert outpath.exists() ts = tszip.decompress(outpath) assert ts.tables == self.ts.tables - store = zarr.storage.ZipStore(str(outpath), mode="r") - root = zarr.open_group(store=store, zarr_format=2, mode="r") + store = _zarr_compat.open_zip_store(outpath, mode="r") + root = _zarr_compat.open_group_for_read(store) for _, g in root.groups(): for _, a in g.arrays(): assert a.chunks == (20,) diff --git a/tests/test_compression.py b/tests/test_compression.py index 88c6441..2541fef 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -30,12 +30,12 @@ import numpy as np import pytest import tskit -import zarr import tszip import tszip.compression as compression import tszip.exceptions as exceptions import tszip.provenance as provenance +from tszip import _zarr_compat class TestMinimalDtype: @@ -295,8 +295,8 @@ def setup(self, tmp_path): def test_format_written(self): ts = msprime.simulate(10, random_seed=1) tszip.compress(ts, self.path) - with zarr.storage.ZipStore(str(self.path), mode="r") as store: - root = zarr.open_group(store=store, zarr_format=2, mode="r") + with _zarr_compat.open_zip_store(self.path, mode="r") as store: + root = _zarr_compat.open_group_for_read(store) assert root.attrs["format_name"] == compression.FORMAT_NAME assert root.attrs["format_version"] == compression.FORMAT_VERSION @@ -304,8 +304,8 @@ def test_provenance(self): ts = msprime.simulate(10, random_seed=1) for variants_only in [True, False]: tszip.compress(ts, self.path, variants_only=variants_only) - with zarr.storage.ZipStore(str(self.path), mode="r") as store: - root = zarr.open_group(store=store, zarr_format=2, mode="r") + with _zarr_compat.open_zip_store(self.path, mode="r") as store: + root = _zarr_compat.open_group_for_read(store) assert root.attrs["provenance"] == provenance.get_provenance_dict( { "variants_only": variants_only, @@ -314,8 +314,8 @@ def test_provenance(self): ) def write_file(self, attrs, path): - with zarr.storage.ZipStore(str(path), mode="w") as store: - root = zarr.open_group(store=store, zarr_format=2, mode="a") + with _zarr_compat.open_zip_store(path, mode="w") as store: + root = _zarr_compat.open_group_for_write(store) root.attrs.update(attrs) def test_missing_format_keys(self): @@ -538,8 +538,8 @@ def test_good_chunks(self, tmpdir, chunk_size): ts2 = tszip.decompress(path) assert ts1 == ts2 - store = zarr.storage.ZipStore(str(path), mode="r") - root = zarr.open_group(store=store, zarr_format=2, mode="r") + store = _zarr_compat.open_zip_store(path, mode="r") + root = _zarr_compat.open_group_for_read(store) for _, g in root.groups(): for _, a in g.arrays(): assert a.chunks == (chunk_size,) diff --git a/tszip/_zarr_compat.py b/tszip/_zarr_compat.py new file mode 100644 index 0000000..b3cb870 --- /dev/null +++ b/tszip/_zarr_compat.py @@ -0,0 +1,47 @@ +import zarr + +_ZARR_V3 = int(zarr.__version__.split(".")[0]) >= 3 + + +def open_zip_store(path, mode): + """Open a ZipStore compatible with zarr v2 and v3.""" + return zarr.storage.ZipStore(str(path), mode=mode) + + +def open_group_for_read(store): + """Open a zarr group for reading in zarr v2 format.""" + if _ZARR_V3: + return zarr.open_group(store=store, zarr_format=2, mode="r") + else: + return zarr.open_group(store=store, mode="r") + + +def open_group_for_write(store): + """Open a zarr group for writing in zarr v2 format.""" + if _ZARR_V3: + return zarr.open_group(store=store, zarr_format=2, mode="a") + else: + return zarr.open_group(store=store, mode="a") + + +def empty_array(root, name, shape, dtype, chunks, filters, compressor): + """Create an empty zarr array in zarr v2 format.""" + if _ZARR_V3: + return root.empty( + name=name, + shape=shape, + dtype=dtype, + chunks=chunks, + zarr_format=2, + filters=filters, + compressor=compressor, + ) + else: + return root.empty( + name=name, + shape=shape, + dtype=dtype, + chunks=chunks, + filters=filters, + compressor=compressor, + ) diff --git a/tszip/compression.py b/tszip/compression.py index c344172..7ee1606 100644 --- a/tszip/compression.py +++ b/tszip/compression.py @@ -39,9 +39,8 @@ import numpy as np import tskit import zarr -from zarr.storage import ZipStore -from . import exceptions, provenance +from . import _zarr_compat, exceptions, provenance logger = logging.getLogger(__name__) @@ -106,8 +105,8 @@ def compress(ts, destination, variants_only=False, *, chunk_size=None): with tempfile.TemporaryDirectory(dir=destdir, prefix=".tszip_work_") as tmpdir: filename = pathlib.Path(tmpdir, "tmp.trees.tgz") logging.debug(f"Writing to temporary file {filename}") - with ZipStore(filename, mode="w") as store: - root = zarr.open_group(store=store, zarr_format=2, mode="a") + with _zarr_compat.open_zip_store(filename, mode="w") as store: + root = _zarr_compat.open_group_for_write(store) compress_zarr(ts, root, variants_only=variants_only, chunk_size=chunk_size) if is_path: os.replace(filename, destination) @@ -151,12 +150,12 @@ def compress(self, root, compressor): filters = None if self.delta_filter: filters = [numcodecs.Delta(dtype=dtype)] - compressed_array = root.empty( + compressed_array = _zarr_compat.empty_array( + root, name=self.name, shape=shape, dtype=dtype, chunks=self.chunks, - zarr_format=2, filters=filters, compressor=compressor, ) @@ -296,8 +295,8 @@ def check_format(root): def load_zarr(path): path = str(path) try: - store = ZipStore(path, mode="r") - root = zarr.open_group(store=store, zarr_format=2, mode="r") + store = _zarr_compat.open_zip_store(path, mode="r") + root = _zarr_compat.open_group_for_read(store) except zipfile.BadZipFile as bzf: raise exceptions.FileFormatError("File is not in tszip format") from bzf diff --git a/uv.lock b/uv.lock index 79bfcf7..8812557 100644 --- a/uv.lock +++ b/uv.lock @@ -2488,7 +2488,7 @@ requires-dist = [ { name = "humanize" }, { name = "numpy" }, { name = "tskit", specifier = ">=1.0.0" }, - { name = "zarr", specifier = ">=3.1" }, + { name = "zarr", specifier = ">=2.18" }, ] [package.metadata.requires-dev] From bc6812e40954e2075f5cc9708f08c1c89ef36bbd Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 4 Mar 2026 21:24:55 +0000 Subject: [PATCH 2/3] Add workflow to test v2 compat --- .github/workflows/zarr-v2-compat.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/zarr-v2-compat.yml diff --git a/.github/workflows/zarr-v2-compat.yml b/.github/workflows/zarr-v2-compat.yml new file mode 100644 index 0000000..2c77f4d --- /dev/null +++ b/.github/workflows/zarr-v2-compat.yml @@ -0,0 +1,23 @@ +name: zarr v2 compatibility + +on: + pull_request: + push: + branches: [main, test] + +jobs: + test: + name: zarr v2 / Python 3.11 / ubuntu + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: astral-sh/setup-uv@v5 + + - name: Install with zarr v2 + run: | + echo "zarr<3" > zarr-override.txt + uv sync --python 3.11 --group test --override zarr-override.txt + + - name: Run tests + run: uv run pytest From c2b779d46c74564a5b7a33901ad61070a8712951 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 4 Mar 2026 21:30:12 +0000 Subject: [PATCH 3/3] Fixup zarr install --- .github/workflows/zarr-v2-compat.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/zarr-v2-compat.yml b/.github/workflows/zarr-v2-compat.yml index 2c77f4d..7b7f22a 100644 --- a/.github/workflows/zarr-v2-compat.yml +++ b/.github/workflows/zarr-v2-compat.yml @@ -14,10 +14,11 @@ jobs: - uses: astral-sh/setup-uv@v5 - - name: Install with zarr v2 - run: | - echo "zarr<3" > zarr-override.txt - uv sync --python 3.11 --group test --override zarr-override.txt + - name: Install dependencies + run: uv sync --python 3.11 --group test + + - name: Downgrade zarr to v2 + run: uv pip install "zarr>=2.18,<3" - name: Run tests run: uv run pytest