Skip to content

Commit f25637d

Browse files
ianhidcherian
andauthored
Support zarr write_empty_chunks for zarr-python 3 and up (#10177)
* bug: fix write_empty_chunks for zarr v3 * future proof write_empty_chunks in append flow * test: fix write_empty_test for zarr 2 * typing: fix typing for write_empty_chunks * small edits --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent 6167eaa commit f25637d

File tree

2 files changed

+83
-25
lines changed

2 files changed

+83
-25
lines changed

Diff for: xarray/backends/zarr.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1084,13 +1084,19 @@ def _open_existing_array(self, *, name) -> ZarrArray:
10841084
# - Existing variables already have their attrs included in the consolidated metadata file.
10851085
# - The size of dimensions can not be expanded, that would require a call using `append_dim`
10861086
# which is mutually exclusive with `region`
1087+
empty: dict[str, bool] | dict[str, dict[str, bool]]
1088+
if _zarr_v3():
1089+
empty = dict(config={"write_empty_chunks": self._write_empty})
1090+
else:
1091+
empty = dict(write_empty_chunks=self._write_empty)
1092+
10871093
zarr_array = zarr.open(
10881094
store=(
10891095
self.zarr_group.store if _zarr_v3() else self.zarr_group.chunk_store
10901096
),
10911097
# TODO: see if zarr should normalize these strings.
10921098
path="/".join([self.zarr_group.name.rstrip("/"), name]).lstrip("/"),
1093-
write_empty_chunks=self._write_empty,
1099+
**empty,
10941100
)
10951101
else:
10961102
zarr_array = self.zarr_group[name]
@@ -1115,6 +1121,14 @@ def _create_new_array(
11151121
else:
11161122
encoding["write_empty_chunks"] = self._write_empty
11171123

1124+
if _zarr_v3():
1125+
# zarr v3 deprecated origin and write_empty_chunks
1126+
# instead preferring to pass them via the config argument
1127+
encoding["config"] = {}
1128+
for c in ("write_empty_chunks", "order"):
1129+
if c in encoding:
1130+
encoding["config"][c] = encoding.pop(c)
1131+
11181132
zarr_array = self.zarr_group.create(
11191133
name,
11201134
shape=shape,

Diff for: xarray/tests/test_backends.py

+68-24
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from collections.abc import Generator, Iterator, Mapping
1818
from contextlib import ExitStack
1919
from io import BytesIO
20-
from os import listdir
2120
from pathlib import Path
2221
from typing import TYPE_CHECKING, Any, Final, Literal, cast
2322
from unittest.mock import patch
@@ -3698,34 +3697,54 @@ def roundtrip_dir(
36983697

36993698
@pytest.mark.parametrize("consolidated", [True, False, None])
37003699
@pytest.mark.parametrize("write_empty", [True, False, None])
3701-
@pytest.mark.skipif(
3702-
has_zarr_v3, reason="zarr-python 3.x removed write_empty_chunks"
3703-
)
37043700
def test_write_empty(
3705-
self, consolidated: bool | None, write_empty: bool | None
3701+
self,
3702+
consolidated: bool | None,
3703+
write_empty: bool | None,
37063704
) -> None:
3707-
if write_empty is False:
3708-
expected = ["0.1.0", "1.1.0"]
3705+
def assert_expected_files(expected: list[str], store: str) -> None:
3706+
"""Convenience for comparing with actual files written"""
3707+
ls = []
3708+
test_root = os.path.join(store, "test")
3709+
for root, _, files in os.walk(test_root):
3710+
ls.extend(
3711+
[
3712+
os.path.join(root, f).removeprefix(test_root).lstrip("/")
3713+
for f in files
3714+
]
3715+
)
3716+
3717+
assert set(expected) == set(
3718+
[
3719+
file.lstrip("c/")
3720+
for file in ls
3721+
if (file not in (".zattrs", ".zarray", "zarr.json"))
3722+
]
3723+
)
3724+
3725+
# The zarr format is set by the `default_zarr_format`
3726+
# pytest fixture that acts on a superclass
3727+
zarr_format_3 = has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3
3728+
if (write_empty is False) or (write_empty is None and has_zarr_v3):
3729+
expected = ["0.1.0"]
37093730
else:
37103731
expected = [
37113732
"0.0.0",
37123733
"0.0.1",
37133734
"0.1.0",
37143735
"0.1.1",
3715-
"1.0.0",
3716-
"1.0.1",
3717-
"1.1.0",
3718-
"1.1.1",
37193736
]
37203737

3721-
ds = xr.Dataset(
3722-
data_vars={
3723-
"test": (
3724-
("Z", "Y", "X"),
3725-
np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2)),
3726-
)
3727-
}
3728-
)
3738+
if zarr_format_3:
3739+
data = np.array([0.0, 0, 1.0, 0]).reshape((1, 2, 2))
3740+
# transform to the path style of zarr 3
3741+
# e.g. 0/0/1
3742+
expected = [e.replace(".", "/") for e in expected]
3743+
else:
3744+
# use nan for default fill_value behaviour
3745+
data = np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2))
3746+
3747+
ds = xr.Dataset(data_vars={"test": (("Z", "Y", "X"), data)})
37293748

37303749
if has_dask:
37313750
ds["test"] = ds["test"].chunk(1)
@@ -3741,17 +3760,42 @@ def test_write_empty(
37413760
write_empty_chunks=write_empty,
37423761
)
37433762

3763+
# check expected files after a write
3764+
assert_expected_files(expected, store)
3765+
37443766
with self.roundtrip_dir(
37453767
ds,
37463768
store,
3747-
{"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
3769+
save_kwargs={
3770+
"mode": "a",
3771+
"append_dim": "Z",
3772+
"write_empty_chunks": write_empty,
3773+
},
37483774
) as a_ds:
37493775
expected_ds = xr.concat([ds, ds], dim="Z")
37503776

3751-
assert_identical(a_ds, expected_ds)
3752-
3753-
ls = listdir(os.path.join(store, "test"))
3754-
assert set(expected) == set([file for file in ls if file[0] != "."])
3777+
assert_identical(a_ds, expected_ds.compute())
3778+
# add the new files we expect to be created by the append
3779+
# that was performed by the roundtrip_dir
3780+
if (write_empty is False) or (write_empty is None and has_zarr_v3):
3781+
expected.append("1.1.0")
3782+
else:
3783+
if not has_zarr_v3:
3784+
# TODO: remove zarr3 if once zarr issue is fixed
3785+
# https://github.com/zarr-developers/zarr-python/issues/2931
3786+
expected.extend(
3787+
[
3788+
"1.1.0",
3789+
"1.0.0",
3790+
"1.0.1",
3791+
"1.1.1",
3792+
]
3793+
)
3794+
else:
3795+
expected.append("1.1.0")
3796+
if zarr_format_3:
3797+
expected = [e.replace(".", "/") for e in expected]
3798+
assert_expected_files(expected, store)
37553799

37563800
def test_avoid_excess_metadata_calls(self) -> None:
37573801
"""Test that chunk requests do not trigger redundant metadata requests.

0 commit comments

Comments
 (0)