From 59eb08fe49fe122cac81e2fd5baf390d8824b577 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 9 Aug 2024 13:08:43 +0200 Subject: [PATCH 01/35] OME-Zarr export: Implement with ngff_zarr --- .../operators/ioOperators/opExportSlot.py | 113 ++++++------------ lazyflow/slot.py | 34 ++++++ lazyflow/utility/io_util/write_ome_zarr.py | 93 ++++++++++++++ .../test_io_util/test_write_ome_zarr.py | 46 +++++++ 4 files changed, 212 insertions(+), 74 deletions(-) create mode 100644 lazyflow/utility/io_util/write_ome_zarr.py create mode 100644 tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 20295a570..1a8bc9023 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -1,12 +1,7 @@ -from builtins import zip -from builtins import map - -from builtins import object - ############################################################################### # lazyflow: data flow based lazy parallel computation framework # -# Copyright (C) 2011-2014, the ilastik developers +# Copyright (C) 2011-2024, the ilastik developers # # # This program is free software; you can redistribute it and/or @@ -25,7 +20,6 @@ # http://ilastik.org/license/ ############################################################################### import os -import shutil import collections import contextlib from functools import partial @@ -35,7 +29,7 @@ from lazyflow.graph import Operator, InputSlot, OutputSlot from lazyflow.roi import roiFromShape -from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p +from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p, isUrl from lazyflow.operators.ioOperators import ( OpH5N5WriterBigDataset, OpStreamingH5N5Reader, @@ -46,6 +40,7 @@ OpExportMultipageTiffSequence, OpExportToArray, ) +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr try: from lazyflow.operators.ioOperators import OpExportDvidVolume @@ -84,6 +79,7 @@ class OpExportSlot(Operator): _2d_exts = vigra.impex.listExtensions().split() # List all supported formats + # Only FormatInfo.name is used (to generate help text for a cmd parameter, DataExportApplet) _2d_formats = [FormatInfo(ext, ext, 2, 2) for ext in _2d_exts] _3d_sequence_formats = [FormatInfo(ext + " sequence", ext, 3, 3) for ext in _2d_exts] _3d_volume_formats = [FormatInfo("multipage tiff", "tiff", 3, 3)] @@ -93,11 +89,11 @@ class OpExportSlot(Operator): FormatInfo("compressed hdf5", "h5", 0, 5), FormatInfo("n5", "n5", 0, 5), FormatInfo("compressed n5", "n5", 0, 5), + FormatInfo("OME-Zarr", "zarr", 0, 5), FormatInfo("numpy", "npy", 0, 5), FormatInfo("dvid", "", 2, 5), FormatInfo("blockwise hdf5", "json", 0, 5), ] - ALL_FORMATS = _2d_formats + _3d_sequence_formats + _3d_volume_formats + _4d_sequence_formats + nd_format_formats def __init__(self, *args, **kwargs): @@ -110,6 +106,7 @@ def __init__(self, *args, **kwargs): export_impls["compressed hdf5"] = ("h5", partial(self._export_h5n5, True)) export_impls["n5"] = ("n5", self._export_h5n5) export_impls["compressed n5"] = ("n5", partial(self._export_h5n5, True)) + export_impls["OME-Zarr"] = ("zarr", self._export_ome_zarr) export_impls["numpy"] = ("npy", self._export_npy) export_impls["dvid"] = ("", self._export_dvid) export_impls["blockwise hdf5"] = ("json", self._export_blockwise_hdf5) @@ -151,7 +148,7 @@ def _executeExportPath(self, result): path_format += "." + file_extension # Provide the TOTAL path (including dataset name) - if self.OutputFormat.value in ("hdf5", "compressed hdf5", "n5", "compressed n5"): + if self.OutputFormat.value in ("hdf5", "compressed hdf5", "n5", "compressed n5", "OME-Zarr"): path_format += "/" + self.OutputInternalPath.value roi = numpy.array(roiFromShape(self.Input.meta.shape)) @@ -186,7 +183,7 @@ def _get_format_selection_error_msg(self, *args): output_format = self.OutputFormat.value # These cases support all combinations - if output_format in ("hdf5", "compressed hdf5", "n5", "compressed n5", "npy", "blockwise hdf5"): + if output_format in ("hdf5", "compressed hdf5", "n5", "compressed n5", "npy", "blockwise hdf5", "OME-Zarr"): return "" tagged_shape = self.Input.meta.getTaggedShape() @@ -252,10 +249,10 @@ def run_export(self): try: export_func = self._export_impls[output_format][1] except KeyError as e: - raise Exception(f"Unknown export format: {output_format}") from e - else: + raise NotImplementedError(f"Unknown export format: {output_format}") from e + if not isUrl(self.ExportPath.value): mkdir_p(PathComponents(self.ExportPath.value).externalDirectory) - export_func() + export_func() def _export_h5n5(self, compress=False): self.progressSignal(0) @@ -399,12 +396,32 @@ def _export_multipage_tiff_sequence(self): opExport.cleanUp() self.progressSignal(100) + def _export_ome_zarr(self): + self.progressSignal(0) + try: + write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal) + finally: + self.progressSignal(100) + np = numpy class FormatValidity(object): + ALL_DTYPES = ( + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + ) + # { extension : [permitted formats] } dtypes = { "jpg": (np.uint8,), @@ -425,66 +442,12 @@ class FormatValidity(object): "ppm": (np.uint8, np.uint16), "pgm": (np.uint8, np.uint16), "pbm": (np.uint8, np.uint16), # vigra outputs p[gn]m - "numpy": ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float32, - np.float64, - ), - "hdf5": ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float32, - np.float64, - ), - "compressed hdf5": ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float32, - np.float64, - ), - "n5": ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float32, - np.float64, - ), - "compressed n5": ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float32, - np.float64, - ), + "numpy": ALL_DTYPES, + "hdf5": ALL_DTYPES, + "compressed hdf5": ALL_DTYPES, + "n5": ALL_DTYPES, + "compressed n5": ALL_DTYPES, + "OME-Zarr": ALL_DTYPES, } # { extension : (min_ndim, max_ndim) } @@ -505,6 +468,7 @@ class FormatValidity(object): "compressed hdf5": (0, 5), "n5": (0, 5), "compressed n5": (0, 5), + "OME-Zarr": (0, 5), } # { extension : [allowed_num_channels] } @@ -525,6 +489,7 @@ class FormatValidity(object): "compressed hdf5": (), # ditto "n5": (), # ditto "compressed n5": (), # ditto + "OME-Zarr": (), } @classmethod diff --git a/lazyflow/slot.py b/lazyflow/slot.py index 8028f92ef..63fdfb98a 100644 --- a/lazyflow/slot.py +++ b/lazyflow/slot.py @@ -1528,3 +1528,37 @@ def __init__(self, *args, **kwargs): super(OutputSlot, self).__init__(*args, **kwargs) self._type = "output" assert "optional" not in kwargs, '"optional" init arg cannot be used with OutputSlot' + + +class SlotAsNDArray: + """Adapter class to provide a numpy-like interface to a Slot. + Primarily this means returning data, not Requests, when sliced. + As a consequence, this undoes the lazy-loading behavior of the Slot. + The intended use is to pass the slot to dask, which then handles the lazy-loading.""" + + @property + def ndim(self): + return len(self.shape) + + @property + def dtype(self): + return self.slot.meta.dtype + + @property + def shape(self): + return self.slot.meta.shape + + def __init__(self, slot: Slot): + self.slot = slot + + def __getitem__(self, key): + request = self.slot[key] + return request.wait() + + def __array__(self): + # For typing only: Allows instances to match numpy.typing.ArrayLike + # If necessary, the implementation should probably just be `return self.slot.value` + raise NotImplementedError("Should never be directly converted to an array.") + + def __repr__(self): + return f"{self.__class__.__name__}({self.slot})" diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py new file mode 100644 index 000000000..b02c726f3 --- /dev/null +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -0,0 +1,93 @@ +import logging +from typing import List, Literal + +import ngff_zarr +import numpy + +from lazyflow.operators import OpReorderAxes +from lazyflow.roi import determineBlockShape, roiToSlice, roiFromShape +from lazyflow.slot import Slot, SlotAsNDArray +from lazyflow.utility import BigRequestStreamer, OrderedSignal, PathComponents +from zarr.storage import FSStore + +from lazyflow.utility.io_util.OMEZarrStore import OME_ZARR_V_0_4_KWARGS + +logger = logging.getLogger(__name__) + + +def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal): + export_path = PathComponents(export_path) + op_reorder = OpReorderAxes(parent=image_source_slot.operator) + op_reorder.AxisOrder.setValue("tczyx") # OME-Zarr convention + try: + op_reorder.Input.connect(image_source_slot) + dims: List[Literal["t", "c", "z", "y", "x"]] = list(op_reorder.Output.meta.axistags.keys()) + scale = {k: 1.0 for k in dims} + translation = {k: 0.0 for k in dims} + image_source = SlotAsNDArray(op_reorder.Output) + image = ngff_zarr.to_ngff_image(image_source, dims=dims, scale=scale, translation=translation) + progress_signal(50) + + multiscales = ngff_zarr.to_multiscales(image, scale_factors=2, chunks=64) + store = FSStore(export_path.externalPath, mode="w", **OME_ZARR_V_0_4_KWARGS) + ngff_zarr.to_ngff_zarr(store, multiscales) + print(export_path.externalPath) + print(multiscales) + finally: + op_reorder.cleanUp() + return + # h5N5GroupName, datasetName = os.path.split(h5N5Path) + # if h5N5GroupName == "": + # g = self.f + # else: + # if h5N5GroupName in self.f: + # g = self.f[h5N5GroupName] + # else: + # g = self.f.create_group(h5N5GroupName) + + data_shape = image_source_slot.meta.shape + logger.info(f"Data shape: {data_shape}") + + dtype = image_source_slot.meta.dtype + if isinstance(dtype, numpy.dtype): + # Make sure we're dealing with a type (e.g. numpy.float64), + # not a numpy.dtype + dtype = dtype.type + # Set up our chunk shape: Aim for a cube that's roughly 512k in size + dtypeBytes = dtype().nbytes + + tagged_maxshape = image_source_slot.meta.getTaggedShape() + if "t" in tagged_maxshape: + # Assume that chunks should not span multiple t-slices, + # and channels are often handled separately, too. + tagged_maxshape["t"] = 1 + + if "c" in tagged_maxshape: + tagged_maxshape["c"] = 1 + + chunkShape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtypeBytes) + + # if datasetName in list(g.keys()): + # del g[datasetName] + kwargs = {"shape": data_shape, "dtype": dtype, "chunks": chunkShape} + + # self.d = g.create_dataset(datasetName, **kwargs) + + progress_signal(0) + + display_mode = image_source_slot.meta.display_mode + axistags = image_source_slot.meta.axistags.toJSON() + neuroglancer_axes = "".join(tag.key for tag in image_source_slot.meta.axistags)[::-1] + drange = image_source_slot.meta.get("drange") + + def handle_block_result(roi, data): + slicing = roiToSlice(*roi) + if data.flags.c_contiguous: + self.d.write_direct(data.view(numpy.ndarray), dest_sel=slicing) + else: + self.d[slicing] = data + + requester = BigRequestStreamer(image_source_slot, roiFromShape(data_shape)) + requester.resultSignal.subscribe(handle_block_result) + requester.progressSignal.subscribe(progress_signal) + requester.execute() diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py new file mode 100644 index 000000000..39be8a3c7 --- /dev/null +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -0,0 +1,46 @@ +from unittest import mock + +import numpy +import pytest +import vigra +import zarr + +from lazyflow.operators import OpArrayPiper +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr + + +@pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc"]) +def data_array(request) -> vigra.VigraArray: + shapes = { + "ilastik default order": (1, 128, 128, 10, 1), + "2d": (128, 128), + "3d": (10, 128, 128), + "2dc": (128, 128, 3), + } + axis_order = { + "ilastik default order": "txyzc", + "2d": "yx", + "3d": "zyx", + "2dc": "yxc", + } + shape = shapes[request.param] + data = vigra.VigraArray(shape, axistags=vigra.defaultAxistags(axis_order[request.param])) + data[...] = numpy.indices(shape).sum(0) + return data + + +def test_write_new_ome_zarr_on_disc(tmp_path, graph, data_array): + export_path = tmp_path / "test.zarr" + source_op = OpArrayPiper(graph=graph) + source_op.Input.setValue(data_array) + progress = mock.Mock() + write_ome_zarr(str(export_path), source_op.Output, progress) + + assert export_path.exists() + store = zarr.open(str(export_path)) + assert "multiscales" in store.attrs + m = store.attrs["multiscales"][0] + assert all(key in m for key in ("datasets", "axes", "version")) + assert m["version"] == "0.4" + assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] + assert all(dataset["path"] in store for dataset in m["datasets"]) From 03f6631a4dd6b269635e321705afa4822089a9dd Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:04:09 +0200 Subject: [PATCH 02/35] OME-Zarr export: Implement and test metadata --- lazyflow/utility/io_util/write_ome_zarr.py | 125 ++++++++---------- .../test_io_util/test_write_ome_zarr.py | 49 +++++-- 2 files changed, 95 insertions(+), 79 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index b02c726f3..2d5eeca59 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,93 +1,76 @@ import logging -from typing import List, Literal +from typing import List, Literal, Optional, Tuple import ngff_zarr import numpy +import zarr +from zarr.storage import FSStore from lazyflow.operators import OpReorderAxes -from lazyflow.roi import determineBlockShape, roiToSlice, roiFromShape +from lazyflow.roi import determineBlockShape from lazyflow.slot import Slot, SlotAsNDArray -from lazyflow.utility import BigRequestStreamer, OrderedSignal, PathComponents -from zarr.storage import FSStore - +from lazyflow.utility import OrderedSignal, PathComponents from lazyflow.utility.io_util.OMEZarrStore import OME_ZARR_V_0_4_KWARGS logger = logging.getLogger(__name__) -def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal): - export_path = PathComponents(export_path) +def _get_chunk_shape(image_source_slot) -> Tuple[int, ...]: + """Determine chunk shape for OME-Zarr storage based on image source slot. + Chunk size is 1 for t and c, and determined by ilastik default rules for zyx, with a target of 512KB per chunk.""" + dtype = image_source_slot.meta.dtype + if isinstance(dtype, numpy.dtype): # Extract raw type class + dtype = dtype.type + dtype_bytes = dtype().nbytes + tagged_maxshape = image_source_slot.meta.getTaggedShape() + tagged_maxshape["t"] = 1 + tagged_maxshape["c"] = 1 + chunk_shape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtype_bytes) + return chunk_shape + + +def write_ome_zarr( + export_path: str, + image_source_slot: Slot, + progress_signal: OrderedSignal, + downscale_method: Optional[ngff_zarr.methods.Methods] = None, +): + pc = PathComponents(export_path) + external_path = pc.externalPath + internal_path = pc.internalPath + op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") # OME-Zarr convention try: op_reorder.Input.connect(image_source_slot) + image_source = SlotAsNDArray(op_reorder.Output) + chunk_shape = _get_chunk_shape(op_reorder.Output) dims: List[Literal["t", "c", "z", "y", "x"]] = list(op_reorder.Output.meta.axistags.keys()) scale = {k: 1.0 for k in dims} translation = {k: 0.0 for k in dims} - image_source = SlotAsNDArray(op_reorder.Output) - image = ngff_zarr.to_ngff_image(image_source, dims=dims, scale=scale, translation=translation) + image = ngff_zarr.to_ngff_image( + image_source, name=internal_path or "image", dims=dims, scale=scale, translation=translation + ) + progress_signal(25) + multiscales = ngff_zarr.to_multiscales(image, scale_factors=2, chunks=chunk_shape, method=downscale_method) progress_signal(50) - - multiscales = ngff_zarr.to_multiscales(image, scale_factors=2, chunks=64) - store = FSStore(export_path.externalPath, mode="w", **OME_ZARR_V_0_4_KWARGS) - ngff_zarr.to_ngff_zarr(store, multiscales) - print(export_path.externalPath) - print(multiscales) + store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) + ngff_zarr.to_ngff_zarr(store, multiscales, overwrite=False) + # Write ilastik metadata + for image in multiscales.images: + # ngff-zarr does not record the storage path in the image object, so we have to look it up. + # The only way we can know that a metadata entry corresponds to this image is the scale factor. + dataset = None + for d in multiscales.metadata.datasets: + scale_transforms = [t for t in d.coordinateTransformations if t.type == "scale"] + dataset_scale_factors = scale_transforms[0].scale # Should only be one + if all(image_scale_factor in dataset_scale_factors for image_scale_factor in image.scale.values()): + dataset = d + break + assert dataset is not None, f"Could not find metadata for image, must be an error in to_ngff_zarr. {image=}" + za = zarr.Array(store, path=dataset.path) + za.attrs["axistags"] = op_reorder.Output.meta.axistags.toJSON() + za.attrs["display_mode"] = image_source_slot.meta.display_mode + za.attrs["drange"] = image_source_slot.meta.get("drange") finally: op_reorder.cleanUp() - return - # h5N5GroupName, datasetName = os.path.split(h5N5Path) - # if h5N5GroupName == "": - # g = self.f - # else: - # if h5N5GroupName in self.f: - # g = self.f[h5N5GroupName] - # else: - # g = self.f.create_group(h5N5GroupName) - - data_shape = image_source_slot.meta.shape - logger.info(f"Data shape: {data_shape}") - - dtype = image_source_slot.meta.dtype - if isinstance(dtype, numpy.dtype): - # Make sure we're dealing with a type (e.g. numpy.float64), - # not a numpy.dtype - dtype = dtype.type - # Set up our chunk shape: Aim for a cube that's roughly 512k in size - dtypeBytes = dtype().nbytes - - tagged_maxshape = image_source_slot.meta.getTaggedShape() - if "t" in tagged_maxshape: - # Assume that chunks should not span multiple t-slices, - # and channels are often handled separately, too. - tagged_maxshape["t"] = 1 - - if "c" in tagged_maxshape: - tagged_maxshape["c"] = 1 - - chunkShape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtypeBytes) - - # if datasetName in list(g.keys()): - # del g[datasetName] - kwargs = {"shape": data_shape, "dtype": dtype, "chunks": chunkShape} - - # self.d = g.create_dataset(datasetName, **kwargs) - - progress_signal(0) - - display_mode = image_source_slot.meta.display_mode - axistags = image_source_slot.meta.axistags.toJSON() - neuroglancer_axes = "".join(tag.key for tag in image_source_slot.meta.axistags)[::-1] - drange = image_source_slot.meta.get("drange") - - def handle_block_result(roi, data): - slicing = roiToSlice(*roi) - if data.flags.c_contiguous: - self.d.write_direct(data.view(numpy.ndarray), dest_sel=slicing) - else: - self.d[slicing] = data - - requester = BigRequestStreamer(image_source_slot, roiFromShape(data_shape)) - requester.resultSignal.subscribe(handle_block_result) - requester.progressSignal.subscribe(progress_signal) - requester.execute() diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 39be8a3c7..c8b124225 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -1,3 +1,4 @@ +import math from unittest import mock import numpy @@ -6,22 +7,33 @@ import zarr from lazyflow.operators import OpArrayPiper +from lazyflow.utility import Timer from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr -@pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc"]) +@pytest.fixture(params=["125MiB"]) def data_array(request) -> vigra.VigraArray: shapes = { - "ilastik default order": (1, 128, 128, 10, 1), - "2d": (128, 128), - "3d": (10, 128, 128), - "2dc": (128, 128, 3), + "ilastik default order": (1, 128, 127, 10, 1), + "2d": (256, 255), + "3d": (10, 126, 125), + "2dc": (124, 123, 3), + "125MiB": (30, 1024, 1024), # 4 bytes per pixel + "500MiB": (125, 1024, 1024), # 4 bytes per pixel + "1GiB": (256, 1024, 1024), + "3GiB": (768, 1024, 1024), + "6GiB": (768, 2048, 1024), } axis_order = { "ilastik default order": "txyzc", "2d": "yx", "3d": "zyx", "2dc": "yxc", + "125MiB": "zyx", + "500MiB": "zyx", + "1GiB": "zyx", + "3GiB": "zyx", + "6GiB": "zyx", } shape = shapes[request.param] data = vigra.VigraArray(shape, axistags=vigra.defaultAxistags(axis_order[request.param])) @@ -34,13 +46,34 @@ def test_write_new_ome_zarr_on_disc(tmp_path, graph, data_array): source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + with Timer() as timer: + write_ome_zarr(str(export_path), source_op.Output, progress) + duration = timer.seconds() + # Manual benchmarking + raw_size = math.prod(data_array.shape) * data_array.dtype.type().nbytes + print(";" f"{data_array.shape};" f"{data_array.dtype};" f"{raw_size};" f"{duration};" f"{duration / raw_size};") + + expected_axiskeys = "tczyx" assert export_path.exists() store = zarr.open(str(export_path)) assert "multiscales" in store.attrs m = store.attrs["multiscales"][0] assert all(key in m for key in ("datasets", "axes", "version")) assert m["version"] == "0.4" - assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] - assert all(dataset["path"] in store for dataset in m["datasets"]) + assert [a["name"] for a in m["axes"]] == list(expected_axiskeys) + tagged_shape = dict(zip(data_array.axistags.keys(), data_array.shape)) + original_shape_reordered = [tagged_shape[a] if a in tagged_shape else 1 for a in expected_axiskeys] + + discovered_keys = [] + for i, dataset in enumerate(m["datasets"]): + assert dataset["path"] in store + discovered_keys.append(dataset["path"]) + written_array = store[dataset["path"]] + assert "axistags" in written_array.attrs, f"no axistags for {dataset['path']}" + assert vigra.AxisTags.fromJSON(written_array.attrs["axistags"]) == vigra.defaultAxistags(expected_axiskeys) + reported_scaling = dataset["coordinateTransformations"][0]["scale"] + expected_shape = tuple(numpy.array(original_shape_reordered) / numpy.array(reported_scaling)) + assert written_array.shape == expected_shape + assert numpy.count_nonzero(written_array) > numpy.prod(expected_shape) / 2, "did not write actual data" + assert all([key in discovered_keys for key in store.keys()]), "store contains undocumented subpaths" From 881f1f73986f2cd6e7f776095df1ff8f715b40d2 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 23 Aug 2024 11:54:09 +0200 Subject: [PATCH 03/35] OME-Zarr export: Raw base implementation (drop ngff-zarr) Actual scaling tbd --- lazyflow/slot.py | 34 ---- lazyflow/utility/io_util/write_ome_zarr.py | 149 +++++++++++++----- .../test_io_util/test_write_ome_zarr.py | 21 +-- 3 files changed, 122 insertions(+), 82 deletions(-) diff --git a/lazyflow/slot.py b/lazyflow/slot.py index 63fdfb98a..8028f92ef 100644 --- a/lazyflow/slot.py +++ b/lazyflow/slot.py @@ -1528,37 +1528,3 @@ def __init__(self, *args, **kwargs): super(OutputSlot, self).__init__(*args, **kwargs) self._type = "output" assert "optional" not in kwargs, '"optional" init arg cannot be used with OutputSlot' - - -class SlotAsNDArray: - """Adapter class to provide a numpy-like interface to a Slot. - Primarily this means returning data, not Requests, when sliced. - As a consequence, this undoes the lazy-loading behavior of the Slot. - The intended use is to pass the slot to dask, which then handles the lazy-loading.""" - - @property - def ndim(self): - return len(self.shape) - - @property - def dtype(self): - return self.slot.meta.dtype - - @property - def shape(self): - return self.slot.meta.shape - - def __init__(self, slot: Slot): - self.slot = slot - - def __getitem__(self, key): - request = self.slot[key] - return request.wait() - - def __array__(self): - # For typing only: Allows instances to match numpy.typing.ArrayLike - # If necessary, the implementation should probably just be `return self.slot.value` - raise NotImplementedError("Should never be directly converted to an array.") - - def __repr__(self): - return f"{self.__class__.__name__}({self.slot})" diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 2d5eeca59..296d43dfe 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,20 +1,29 @@ +import dataclasses import logging -from typing import List, Literal, Optional, Tuple +from functools import partial +from typing import List, Tuple, Dict -import ngff_zarr import numpy import zarr from zarr.storage import FSStore +from ilastik import __version__ as ilastik_version from lazyflow.operators import OpReorderAxes -from lazyflow.roi import determineBlockShape -from lazyflow.slot import Slot, SlotAsNDArray -from lazyflow.utility import OrderedSignal, PathComponents +from lazyflow.roi import determineBlockShape, roiFromShape, roiToSlice +from lazyflow.slot import Slot +from lazyflow.utility import OrderedSignal, PathComponents, BigRequestStreamer from lazyflow.utility.io_util.OMEZarrStore import OME_ZARR_V_0_4_KWARGS logger = logging.getLogger(__name__) +@dataclasses.dataclass +class ImageMetadata: + path: str + scale: Dict[str, float] + translation: Dict[str, float] + + def _get_chunk_shape(image_source_slot) -> Tuple[int, ...]: """Determine chunk shape for OME-Zarr storage based on image source slot. Chunk size is 1 for t and c, and determined by ilastik default rules for zyx, with a target of 512KB per chunk.""" @@ -25,52 +34,114 @@ def _get_chunk_shape(image_source_slot) -> Tuple[int, ...]: tagged_maxshape = image_source_slot.meta.getTaggedShape() tagged_maxshape["t"] = 1 tagged_maxshape["c"] = 1 - chunk_shape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtype_bytes) + chunk_shape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtype_bytes) # 512KB chunk size return chunk_shape -def write_ome_zarr( +def _get_scalings(image_source_slot, chunk_shape: Tuple[int, ...]) -> List[Dict[str, float]]: + """ + Computes scaling factors in the OME-Zarr sense. + Downscaling is done by a factor of 2 in all spatial dimensions until one dimension is smaller than half its chunk size. + Returns list of scaling factors by axis, starting with original scale. + Scaling is meant as a factor of the pixel unit, i.e. if axis is in nm, factor 2.0 means 2nm. + When applied to pixel shape, this means the factor is a divisor (scaled shape = original shape // factor). + """ + # Until ilastik handles pixel units, original scale is 1px + spatial = ["z", "y", "x"] + original_scale = {a: 1.0 for a in image_source_slot.meta.getAxisKeys()} + return [original_scale] # [{"z": 1., "y": 2., "x": 2.}, {"z": 1., "y": 4., "x": 4.}] + + +def _compute_and_write_scales( export_path: str, - image_source_slot: Slot, + image_source_slot, progress_signal: OrderedSignal, - downscale_method: Optional[ngff_zarr.methods.Methods] = None, -): +) -> List[ImageMetadata]: pc = PathComponents(export_path) external_path = pc.externalPath internal_path = pc.internalPath + store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) + chunk_shape = _get_chunk_shape(image_source_slot) + scalings = _get_scalings(image_source_slot, chunk_shape) + meta = [] + + for i, scaling in enumerate(scalings): + scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" + scaled_shape = ( + int(s // scaling[a]) if a in scaling else s for a, s in image_source_slot.meta.getTaggedShape().items() + ) + zarray = zarr.creation.empty( + scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype + ) + + def scale_and_write_block(scale_index, scaling_, zarray_, roi, data): + if scale_index > 0: + logger.info(f"Scale {scale_index}: Applying {scaling_=} to {roi=}") + slicing = roiToSlice(*roi) + logger.info(f"Scale {scale_index}: Writing to {slicing=}: {data=}") + zarray_[slicing] = data + + requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) + requester.resultSignal.subscribe(partial(scale_and_write_block, i, scaling, zarray)) + requester.progressSignal.subscribe(progress_signal) + requester.execute() + + meta.append(ImageMetadata(scale_path, scaling, {})) + + return meta + +def _write_ome_zarr_and_ilastik_metadata( + export_path: str, multiscale_metadata: List[ImageMetadata], ilastik_meta: Dict +): + pc = PathComponents(export_path) + external_path = pc.externalPath + multiscale_name = pc.internalPath + ilastik_signature = {"name": "ilastik", "version": ilastik_version, "ome_zarr_exporter_version": 1} + axis_types = {"t": "time", "c": "channel", "z": "space", "y": "space", "x": "space"} + axes = [{"name": tag.key, "type": axis_types[tag.key]} for tag in ilastik_meta["axistags"]] + datasets = [ + { + "path": image.path, + "coordinateTransformations": [ + {"type": "scale", "scale": [image.scale[tag.key] for tag in ilastik_meta["axistags"]]} + ], + } + for image in multiscale_metadata + ] + store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) + root = zarr.group(store, overwrite=False) + root.attrs["multiscales"] = [ + {"_creator": ilastik_signature, "version": "0.4", "name": multiscale_name, "axes": axes, "datasets": datasets} + ] + for image in multiscale_metadata: + za = zarr.Array(store, path=image.path) + za.attrs["axistags"] = ilastik_meta["axistags"].toJSON() + za.attrs["display_mode"] = ilastik_meta["display_mode"] + za.attrs["drange"] = ilastik_meta.get("drange") + + +def write_ome_zarr( + export_path: str, + image_source_slot: Slot, + progress_signal: OrderedSignal, +): op_reorder = OpReorderAxes(parent=image_source_slot.operator) - op_reorder.AxisOrder.setValue("tczyx") # OME-Zarr convention + op_reorder.AxisOrder.setValue("tczyx") try: op_reorder.Input.connect(image_source_slot) - image_source = SlotAsNDArray(op_reorder.Output) - chunk_shape = _get_chunk_shape(op_reorder.Output) - dims: List[Literal["t", "c", "z", "y", "x"]] = list(op_reorder.Output.meta.axistags.keys()) - scale = {k: 1.0 for k in dims} - translation = {k: 0.0 for k in dims} - image = ngff_zarr.to_ngff_image( - image_source, name=internal_path or "image", dims=dims, scale=scale, translation=translation - ) + image_source = op_reorder.Output progress_signal(25) - multiscales = ngff_zarr.to_multiscales(image, scale_factors=2, chunks=chunk_shape, method=downscale_method) - progress_signal(50) - store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) - ngff_zarr.to_ngff_zarr(store, multiscales, overwrite=False) - # Write ilastik metadata - for image in multiscales.images: - # ngff-zarr does not record the storage path in the image object, so we have to look it up. - # The only way we can know that a metadata entry corresponds to this image is the scale factor. - dataset = None - for d in multiscales.metadata.datasets: - scale_transforms = [t for t in d.coordinateTransformations if t.type == "scale"] - dataset_scale_factors = scale_transforms[0].scale # Should only be one - if all(image_scale_factor in dataset_scale_factors for image_scale_factor in image.scale.values()): - dataset = d - break - assert dataset is not None, f"Could not find metadata for image, must be an error in to_ngff_zarr. {image=}" - za = zarr.Array(store, path=dataset.path) - za.attrs["axistags"] = op_reorder.Output.meta.axistags.toJSON() - za.attrs["display_mode"] = image_source_slot.meta.display_mode - za.attrs["drange"] = image_source_slot.meta.get("drange") + ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal) + progress_signal(95) + _write_ome_zarr_and_ilastik_metadata( + export_path, + ome_zarr_meta, + { + "axistags": op_reorder.Output.meta.axistags, + "display_mode": image_source_slot.meta.display_mode, + "drange": image_source_slot.meta.get("drange"), + }, + ) finally: op_reorder.cleanUp() diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index c8b124225..f5f28ee9c 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -11,7 +11,7 @@ from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr -@pytest.fixture(params=["125MiB"]) +@pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc", "125MiB"]) def data_array(request) -> vigra.VigraArray: shapes = { "ilastik default order": (1, 128, 127, 10, 1), @@ -41,7 +41,7 @@ def data_array(request) -> vigra.VigraArray: return data -def test_write_new_ome_zarr_on_disc(tmp_path, graph, data_array): +def test_metadata_integrity(tmp_path, graph, data_array): export_path = tmp_path / "test.zarr" source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data_array) @@ -58,22 +58,25 @@ def test_write_new_ome_zarr_on_disc(tmp_path, graph, data_array): assert export_path.exists() store = zarr.open(str(export_path)) assert "multiscales" in store.attrs - m = store.attrs["multiscales"][0] - assert all(key in m for key in ("datasets", "axes", "version")) - assert m["version"] == "0.4" - assert [a["name"] for a in m["axes"]] == list(expected_axiskeys) + written_meta = store.attrs["multiscales"][0] + assert all(key in written_meta for key in ("datasets", "axes", "version")) + assert written_meta["version"] == "0.4" + assert [a["name"] for a in written_meta["axes"]] == list(expected_axiskeys) tagged_shape = dict(zip(data_array.axistags.keys(), data_array.shape)) original_shape_reordered = [tagged_shape[a] if a in tagged_shape else 1 for a in expected_axiskeys] discovered_keys = [] - for i, dataset in enumerate(m["datasets"]): + for dataset in written_meta["datasets"]: assert dataset["path"] in store discovered_keys.append(dataset["path"]) written_array = store[dataset["path"]] assert "axistags" in written_array.attrs, f"no axistags for {dataset['path']}" assert vigra.AxisTags.fromJSON(written_array.attrs["axistags"]) == vigra.defaultAxistags(expected_axiskeys) - reported_scaling = dataset["coordinateTransformations"][0]["scale"] - expected_shape = tuple(numpy.array(original_shape_reordered) / numpy.array(reported_scaling)) + reported_scalings = [ + transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale" + ] + assert len(reported_scalings) == 1 + expected_shape = tuple(numpy.array(original_shape_reordered) // numpy.array(reported_scalings[0]["scale"])) assert written_array.shape == expected_shape assert numpy.count_nonzero(written_array) > numpy.prod(expected_shape) / 2, "did not write actual data" assert all([key in discovered_keys for key in store.keys()]), "store contains undocumented subpaths" From 58b15f84f40cd19acc13400fa17bb3713a88c201 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 23 Aug 2024 13:10:05 +0200 Subject: [PATCH 04/35] OME-Zarr export: Do not write None metadata --- lazyflow/utility/io_util/write_ome_zarr.py | 15 +++++++++------ .../test_io_util/test_write_ome_zarr.py | 4 +++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 296d43dfe..bff02e689 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -109,16 +109,19 @@ def _write_ome_zarr_and_ilastik_metadata( } for image in multiscale_metadata ] + ome_zarr_multiscale_meta = {"_creator": ilastik_signature, "version": "0.4", "axes": axes, "datasets": datasets} + if multiscale_name: + ome_zarr_multiscale_meta["name"] = multiscale_name store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) root = zarr.group(store, overwrite=False) - root.attrs["multiscales"] = [ - {"_creator": ilastik_signature, "version": "0.4", "name": multiscale_name, "axes": axes, "datasets": datasets} - ] + root.attrs["multiscales"] = [ome_zarr_multiscale_meta] for image in multiscale_metadata: za = zarr.Array(store, path=image.path) za.attrs["axistags"] = ilastik_meta["axistags"].toJSON() - za.attrs["display_mode"] = ilastik_meta["display_mode"] - za.attrs["drange"] = ilastik_meta.get("drange") + if ilastik_meta["display_mode"]: + za.attrs["display_mode"] = ilastik_meta["display_mode"] + if ilastik_meta["drange"]: + za.attrs["drange"] = ilastik_meta["drange"] def write_ome_zarr( @@ -139,7 +142,7 @@ def write_ome_zarr( ome_zarr_meta, { "axistags": op_reorder.Output.meta.axistags, - "display_mode": image_source_slot.meta.display_mode, + "display_mode": image_source_slot.meta.get("display_mode"), "drange": image_source_slot.meta.get("drange"), }, ) diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index f5f28ee9c..d8cc0a546 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -59,7 +59,8 @@ def test_metadata_integrity(tmp_path, graph, data_array): store = zarr.open(str(export_path)) assert "multiscales" in store.attrs written_meta = store.attrs["multiscales"][0] - assert all(key in written_meta for key in ("datasets", "axes", "version")) + assert all([key in written_meta for key in ("datasets", "axes", "version")]) # Keys required by spec + assert all([value is not None for value in written_meta.values()]) # Should not write None anywhere assert written_meta["version"] == "0.4" assert [a["name"] for a in written_meta["axes"]] == list(expected_axiskeys) tagged_shape = dict(zip(data_array.axistags.keys(), data_array.shape)) @@ -72,6 +73,7 @@ def test_metadata_integrity(tmp_path, graph, data_array): written_array = store[dataset["path"]] assert "axistags" in written_array.attrs, f"no axistags for {dataset['path']}" assert vigra.AxisTags.fromJSON(written_array.attrs["axistags"]) == vigra.defaultAxistags(expected_axiskeys) + assert all([value is not None for value in written_array.attrs.values()]) # Should not write None anywhere reported_scalings = [ transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale" ] From ff33490ddd69156247781b4f0c6a9b7eb32d8771 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:44:19 +0200 Subject: [PATCH 05/35] OME-Zarr export: Implement scaling factor calculation --- lazyflow/utility/io_util/write_ome_zarr.py | 73 ++++++++++++++----- .../test_io_util/test_write_ome_zarr.py | 54 +++++++++++++- 2 files changed, 107 insertions(+), 20 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index bff02e689..3a8f99ba3 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,7 +1,7 @@ import dataclasses import logging from functools import partial -from typing import List, Tuple, Dict +from typing import List, Tuple, Dict, Optional, OrderedDict import numpy import zarr @@ -24,7 +24,7 @@ class ImageMetadata: translation: Dict[str, float] -def _get_chunk_shape(image_source_slot) -> Tuple[int, ...]: +def _get_chunk_shape(image_source_slot: Slot) -> Tuple[int, ...]: """Determine chunk shape for OME-Zarr storage based on image source slot. Chunk size is 1 for t and c, and determined by ilastik default rules for zyx, with a target of 512KB per chunk.""" dtype = image_source_slot.meta.dtype @@ -38,38 +38,72 @@ def _get_chunk_shape(image_source_slot) -> Tuple[int, ...]: return chunk_shape -def _get_scalings(image_source_slot, chunk_shape: Tuple[int, ...]) -> List[Dict[str, float]]: +def _get_scalings( + original_tagged_shape: OrderedDict[str, int], chunk_shape: Tuple[int, ...], min_length: Optional[int] +) -> List[Dict[str, float]]: """ - Computes scaling factors in the OME-Zarr sense. - Downscaling is done by a factor of 2 in all spatial dimensions until one dimension is smaller than half its chunk size. - Returns list of scaling factors by axis, starting with original scale. - Scaling is meant as a factor of the pixel unit, i.e. if axis is in nm, factor 2.0 means 2nm. - When applied to pixel shape, this means the factor is a divisor (scaled shape = original shape // factor). + Computes scaling "factors". + Technically they are divisors for the shape (factor 2.0 means half the shape). + Downscaling is done by a factor of 2 in all spatial dimensions until: + - the dataset would be less than 4 x chunk size (2MiB) + - an axis that started non-singleton would become singleton + - the largest axis would be smaller than min_length (if defined). + Returns list of scaling factor dicts by axis, starting with original scale. + The scaling level that meets one of the exit conditions is excluded. + Raises if more than 20 scales are computed (sanity). """ - # Until ilastik handles pixel units, original scale is 1px + assert len(chunk_shape) == len(original_tagged_shape), "Chunk shape and tagged shape must have same length" spatial = ["z", "y", "x"] - original_scale = {a: 1.0 for a in image_source_slot.meta.getAxisKeys()} - return [original_scale] # [{"z": 1., "y": 2., "x": 2.}, {"z": 1., "y": 4., "x": 4.}] + original_scale = {a: 1.0 for a in original_tagged_shape.keys()} + scalings = [original_scale] + sanity_limit = 20 + for i in range(sanity_limit): + if i == sanity_limit: + raise ValueError(f"Too many scales computed, limit={sanity_limit}. Please report this to the developers.") + previous_scaling = scalings[-1] + new_scaling = { + a: s * 2.0 if a in spatial and original_tagged_shape[a] > 1 else 1.0 for a, s in previous_scaling.items() + } + new_shape = _get_scaled_slot_shape(original_tagged_shape, new_scaling) + if ( + _is_less_than_4_chunks(new_shape, chunk_shape) + or _reduces_any_axis_to_singleton(new_shape, tuple(original_tagged_shape.values())) + or (min_length and max(new_shape) < min_length) + ): + break + scalings.append(new_scaling) + return scalings + + +def _reduces_any_axis_to_singleton(new_shape: Tuple[int, ...], original_shape: Tuple[int, ...]): + return any(new <= 1 < orig for new, orig in zip(new_shape, original_shape)) + + +def _is_less_than_4_chunks(new_shape: Tuple[int, ...], chunk_shape: Tuple[int, ...]): + return numpy.prod(new_shape) < 4 * numpy.prod(chunk_shape) + + +def _get_scaled_slot_shape( + original_tagged_shape: OrderedDict[str, int], new_scaling: Dict[str, float] +) -> Tuple[int, ...]: + assert all(s > 0 for s in new_scaling.values()), f"Invalid scaling: {new_scaling}" + return tuple(int(s // new_scaling[a]) if a in new_scaling else s for a, s in original_tagged_shape.items()) def _compute_and_write_scales( - export_path: str, - image_source_slot, - progress_signal: OrderedSignal, + export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, min_length: Optional[int] ) -> List[ImageMetadata]: pc = PathComponents(export_path) external_path = pc.externalPath internal_path = pc.internalPath store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) chunk_shape = _get_chunk_shape(image_source_slot) - scalings = _get_scalings(image_source_slot, chunk_shape) + scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, min_length) meta = [] for i, scaling in enumerate(scalings): scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" - scaled_shape = ( - int(s // scaling[a]) if a in scaling else s for a, s in image_source_slot.meta.getTaggedShape().items() - ) + scaled_shape = _get_scaled_slot_shape(image_source_slot.meta.getTaggedShape(), scaling) zarray = zarr.creation.empty( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype ) @@ -128,6 +162,7 @@ def write_ome_zarr( export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, + min_length: Optional[int] = None, ): op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") @@ -135,7 +170,7 @@ def write_ome_zarr( op_reorder.Input.connect(image_source_slot) image_source = op_reorder.Output progress_signal(25) - ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal) + ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal, min_length) progress_signal(95) _write_ome_zarr_and_ilastik_metadata( export_path, diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index d8cc0a546..e64bb7c1b 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -1,4 +1,5 @@ import math +from collections import OrderedDict from unittest import mock import numpy @@ -8,7 +9,7 @@ from lazyflow.operators import OpArrayPiper from lazyflow.utility import Timer -from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings @pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc", "125MiB"]) @@ -82,3 +83,54 @@ def test_metadata_integrity(tmp_path, graph, data_array): assert written_array.shape == expected_shape assert numpy.count_nonzero(written_array) > numpy.prod(expected_shape) / 2, "did not write actual data" assert all([key in discovered_keys for key in store.keys()]), "store contains undocumented subpaths" + + +@pytest.mark.parametrize( + "data_shape, expected_scalings", + [ + # Criterion: 4 x chunk size, i.e.: 4 * math.prod(4, 179, 178) -- times 8 for scaling by 2 in 3D + ((1, 1, 4, 1009, 1010), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # Just under criterion to be scaled + ((1, 1, 4, 1011, 1010), [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 2.0, 2.0]]), # Just over criterion + ((1, 1, 1, 30, 30), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # Too small + ((1, 1, 2, 4040, 4040), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # No reduction to singleton or anisotropic scaling + ((1, 1, 1, 1430, 1430), [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 2.0, 2.0]]), # 2D scaling is fine + ], +) +def test_downscaling(tmp_path, graph, data_shape, expected_scalings): + data = vigra.VigraArray(data_shape, axistags=vigra.defaultAxistags("tczyx")) + data[...] = numpy.indices(data_shape).sum(0) + export_path = tmp_path / "test.zarr" + source_op = OpArrayPiper(graph=graph) + source_op.Input.setValue(data) + progress = mock.Mock() + with Timer() as timer: + write_ome_zarr(str(export_path), source_op.Output, progress) + duration = timer.seconds() + + # Manual benchmarking + raw_size = math.prod(data.shape) * data.dtype.type().nbytes + print(";" f"{data.shape};" f"{data.dtype};" f"{raw_size};" f"{duration};" f"{duration / raw_size};") + + store = zarr.open(str(export_path)) + meta = store.attrs["multiscales"][0] + assert len(meta["datasets"]) == len(expected_scalings) + + for i, scaling in enumerate(expected_scalings): + dataset = meta["datasets"][i] + scale_transforms = [ + transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale" + ] + assert scale_transforms[0]["scale"] == scaling + + +def test_downscaling_raises(): + # Testing at the implementation level instead of top-level write_ome_zarr for simplicity. + # Would need to set up a data array with an insane shape without actually allocating RAM for it. + scaling_factor = 2 + sanity_limit = 20 + minimum_chunks_per_scale = 8 + chunk_length = 100 + insane_length = chunk_length * (scaling_factor**sanity_limit) * minimum_chunks_per_scale + insane_data_shape = OrderedDict({"t": 1, "c": 1, "z": 1, "y": 1, "x": insane_length}) + with pytest.raises(ValueError, match="Too many scales"): + _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), None) From 8dab2dce8f57a44cee0cb0b671fd3570fbb1f7b0 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:19:59 +0200 Subject: [PATCH 06/35] OME-Zarr export: Implement blockwise scaling * Simple reduction-downsampling for now * Scale data and roi in one function to avoid separate calculations of scaled shape that depend on each other --- lazyflow/utility/io_util/write_ome_zarr.py | 115 ++++++++++++------ .../test_io_util/test_write_ome_zarr.py | 92 ++++++++++++-- 2 files changed, 164 insertions(+), 43 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 3a8f99ba3..ea1da4e8b 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,5 +1,7 @@ import dataclasses import logging +import math +from collections import OrderedDict as ODict from functools import partial from typing import List, Tuple, Dict, Optional, OrderedDict @@ -16,15 +18,19 @@ logger = logging.getLogger(__name__) +Shape = Tuple[int, ...] +TaggedShape = OrderedDict[str, int] # axis: size +OrderedScaling = OrderedDict[str, float] # axis: scale + @dataclasses.dataclass class ImageMetadata: path: str - scale: Dict[str, float] + scale: OrderedScaling translation: Dict[str, float] -def _get_chunk_shape(image_source_slot: Slot) -> Tuple[int, ...]: +def _get_chunk_shape(image_source_slot: Slot) -> Shape: """Determine chunk shape for OME-Zarr storage based on image source slot. Chunk size is 1 for t and c, and determined by ilastik default rules for zyx, with a target of 512KB per chunk.""" dtype = image_source_slot.meta.dtype @@ -39,8 +45,8 @@ def _get_chunk_shape(image_source_slot: Slot) -> Tuple[int, ...]: def _get_scalings( - original_tagged_shape: OrderedDict[str, int], chunk_shape: Tuple[int, ...], min_length: Optional[int] -) -> List[Dict[str, float]]: + original_tagged_shape: TaggedShape, chunk_shape: Shape, min_length: Optional[int] +) -> List[OrderedScaling]: """ Computes scaling "factors". Technically they are divisors for the shape (factor 2.0 means half the shape). @@ -54,17 +60,19 @@ def _get_scalings( """ assert len(chunk_shape) == len(original_tagged_shape), "Chunk shape and tagged shape must have same length" spatial = ["z", "y", "x"] - original_scale = {a: 1.0 for a in original_tagged_shape.keys()} + original_scale = ODict([(a, 1.0) for a in original_tagged_shape.keys()]) scalings = [original_scale] sanity_limit = 20 for i in range(sanity_limit): if i == sanity_limit: raise ValueError(f"Too many scales computed, limit={sanity_limit}. Please report this to the developers.") - previous_scaling = scalings[-1] - new_scaling = { - a: s * 2.0 if a in spatial and original_tagged_shape[a] > 1 else 1.0 for a, s in previous_scaling.items() - } - new_shape = _get_scaled_slot_shape(original_tagged_shape, new_scaling) + new_scaling = ODict( + [ + (a, 2.0 ** (i + 1)) if a in spatial and original_tagged_shape[a] > 1 else (a, 1.0) + for a in original_tagged_shape.keys() + ] + ) + new_shape = _scale_tagged_shape(original_tagged_shape, new_scaling) if ( _is_less_than_4_chunks(new_shape, chunk_shape) or _reduces_any_axis_to_singleton(new_shape, tuple(original_tagged_shape.values())) @@ -75,19 +83,51 @@ def _get_scalings( return scalings -def _reduces_any_axis_to_singleton(new_shape: Tuple[int, ...], original_shape: Tuple[int, ...]): +def _reduces_any_axis_to_singleton(new_shape: Shape, original_shape: Shape): return any(new <= 1 < orig for new, orig in zip(new_shape, original_shape)) -def _is_less_than_4_chunks(new_shape: Tuple[int, ...], chunk_shape: Tuple[int, ...]): +def _is_less_than_4_chunks(new_shape: Shape, chunk_shape: Shape): return numpy.prod(new_shape) < 4 * numpy.prod(chunk_shape) -def _get_scaled_slot_shape( - original_tagged_shape: OrderedDict[str, int], new_scaling: Dict[str, float] -) -> Tuple[int, ...]: - assert all(s > 0 for s in new_scaling.values()), f"Invalid scaling: {new_scaling}" - return tuple(int(s // new_scaling[a]) if a in new_scaling else s for a, s in original_tagged_shape.items()) +def _scale_tagged_shape(original_tagged_shape: TaggedShape, scaling: OrderedScaling) -> Shape: + assert all(s > 0 for s in scaling.values()), f"Invalid scaling: {scaling}" + return tuple( + _round_like_scaling_method(s / scaling[a]) if a in scaling else s for a, s in original_tagged_shape.items() + ) + + +def _round_like_scaling_method(value: float) -> int: + """For calculating scaled shape after applying the scaling method. + Different scaling methods might round differently, so we need to match that.""" + # Currently the only rounding method is 2-step indexing of numpy array, which always rounds up + # numpy.ones(7)[::2].shape == (4,) + return math.ceil(value) + + +def _apply_scaling_method( + data: numpy.typing.NDArray, current_block_roi: Tuple[List[int], List[int]], scaling: OrderedScaling +) -> Tuple[numpy.typing.NDArray, Tuple[List[int], List[int]]]: + """Downscale data by applying scaling factors to spatial dimensions. + Ordering of `data.shape`, scaling and current_block_roi must match. + Needs to know block roi to determine position of the scaled block within the total scaled image. + Returns scaled data and scaled roi because the roi must be adjusted for blockwise rounding. + """ + scaling_int = [int(s) for s in scaling.values()] + starts = current_block_roi[0] + # Specific to downsampling, where scale = step size. + # When scale=5, the pixels that should be included in the final result are at 0, 5, 10, 15, ... + # If e.g. start=22 for this block, the block must internally add crop=3, + # so that it globally starts at 25: 22 + 3 = 25, where 3 = 5 - (22 % 5) + block_start_crops = [ + (scale - (start % scale)) if start % scale > 0 else 0 for start, scale in zip(starts, scaling_int) + ] + crop_and_downsample_slicing = tuple(slice(crop, None, scale) for crop, scale in zip(block_start_crops, scaling_int)) + scaled_starts = [_round_like_scaling_method(start / scale) for start, scale in zip(starts, scaling_int)] + scaled_stops = [_round_like_scaling_method(stop / scale) for stop, scale in zip(current_block_roi[1], scaling_int)] + scaled_roi = (scaled_starts, scaled_stops) + return data[crop_and_downsample_slicing], scaled_roi def _compute_and_write_scales( @@ -98,30 +138,37 @@ def _compute_and_write_scales( internal_path = pc.internalPath store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) chunk_shape = _get_chunk_shape(image_source_slot) + scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, min_length) + zarrays = [] meta = [] - for i, scaling in enumerate(scalings): scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" - scaled_shape = _get_scaled_slot_shape(image_source_slot.meta.getTaggedShape(), scaling) - zarray = zarr.creation.empty( - scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype + scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling) + zarrays.append( + zarr.creation.empty( + scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype + ) ) - - def scale_and_write_block(scale_index, scaling_, zarray_, roi, data): - if scale_index > 0: - logger.info(f"Scale {scale_index}: Applying {scaling_=} to {roi=}") - slicing = roiToSlice(*roi) - logger.info(f"Scale {scale_index}: Writing to {slicing=}: {data=}") - zarray_[slicing] = data - - requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) - requester.resultSignal.subscribe(partial(scale_and_write_block, i, scaling, zarray)) - requester.progressSignal.subscribe(progress_signal) - requester.execute() - meta.append(ImageMetadata(scale_path, scaling, {})) + def scale_and_write_block(scalings_, zarrays_, roi, data): + for i_, scaling_ in enumerate(scalings_): + if i_ > 0: + logger.info(f"Scale {i_}: Applying {scaling_=} to {roi=}") + scaled_data, scaled_roi = _apply_scaling_method(data, roi, scaling_) + slicing = roiToSlice(*scaled_roi) + else: + slicing = roiToSlice(*roi) + scaled_data = data + logger.info(f"Scale {i_}: Writing data with shape={scaled_data.shape} to {slicing=}") + zarrays_[i_][slicing] = scaled_data + + requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) + requester.resultSignal.subscribe(partial(scale_and_write_block, scalings, zarrays)) + requester.progressSignal.subscribe(progress_signal) + requester.execute() + return meta diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index e64bb7c1b..e6b39d304 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -8,8 +8,9 @@ import zarr from lazyflow.operators import OpArrayPiper +from lazyflow.roi import roiToSlice from lazyflow.utility import Timer -from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings, _apply_scaling_method @pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc", "125MiB"]) @@ -79,30 +80,68 @@ def test_metadata_integrity(tmp_path, graph, data_array): transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale" ] assert len(reported_scalings) == 1 - expected_shape = tuple(numpy.array(original_shape_reordered) // numpy.array(reported_scalings[0]["scale"])) + expected_shape = tuple( + math.ceil(orig / reported) + for orig, reported in zip(original_shape_reordered, reported_scalings[0]["scale"]) + ) assert written_array.shape == expected_shape assert numpy.count_nonzero(written_array) > numpy.prod(expected_shape) / 2, "did not write actual data" assert all([key in discovered_keys for key in store.keys()]), "store contains undocumented subpaths" @pytest.mark.parametrize( - "data_shape, expected_scalings", + "data_shape", [ # Criterion: 4 x chunk size, i.e.: 4 * math.prod(4, 179, 178) -- times 8 for scaling by 2 in 3D - ((1, 1, 4, 1009, 1010), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # Just under criterion to be scaled - ((1, 1, 4, 1011, 1010), [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 2.0, 2.0]]), # Just over criterion - ((1, 1, 1, 30, 30), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # Too small - ((1, 1, 2, 4040, 4040), [[1.0, 1.0, 1.0, 1.0, 1.0]]), # No reduction to singleton or anisotropic scaling - ((1, 1, 1, 1430, 1430), [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 2.0, 2.0]]), # 2D scaling is fine + (1, 1, 4, 1008, 1010), # Just under criterion to be scaled + (1, 1, 1, 30, 30), # Tiny + (1, 1, 2, 1432, 1432), # No reduction to singleton or anisotropic scaling ], ) -def test_downscaling(tmp_path, graph, data_shape, expected_scalings): +def test_writes_with_no_scaling(tmp_path, graph, data_shape): data = vigra.VigraArray(data_shape, axistags=vigra.defaultAxistags("tczyx")) data[...] = numpy.indices(data_shape).sum(0) export_path = tmp_path / "test.zarr" source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data) progress = mock.Mock() + + write_ome_zarr(str(export_path), source_op.Output, progress) + + store = zarr.open(str(export_path)) + meta = store.attrs["multiscales"][0] + assert len(meta["datasets"]) == 1 + dataset = meta["datasets"][0] + numpy.testing.assert_array_equal(store[dataset["path"]], data) + scale_transforms = [transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale"] + assert scale_transforms[0]["scale"] == [1.0, 1.0, 1.0, 1.0, 1.0] + + +@pytest.mark.parametrize( + "data_shape, computation_block_shape, expected_scalings", + [ + # Criterion: 4 x chunk size = 4 * math.prod(50, 51, 50) -- times 8 for scaling by 2 in 3D + ((1, 1, 66, 250, 250), None, [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0, 2.0]]), + # 2D scaling: complements the (1, 1, 2, 1432, 1432) case in test_writes_with_no_scaling. + # Ensures that the xy dimensions are sufficient to be scaled (but z=2 suppresses it). + ((1, 1, 1, 1432, 1432), None, [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 2.0, 2.0]]), + ( # Provoke rounding difficulties due to blockwise scaling at 4x + (1, 1, 310, 361, 371), # 371/4 = 92.75, so expected scaled shape in x is 93. 371/91 = 4 blocks + 7 pixels. + (1, 1, 310, 361, 91), # 91/4 = 22.75, so 23 per block. Plus 7/4 = 1.75, so 2. Total of 94px blockwise. + [[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0, 2.0], [1.0, 1.0, 4.0, 4.0, 4.0]], + ), + ], +) +def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expected_scalings): + data = vigra.VigraArray(data_shape, axistags=vigra.defaultAxistags("tczyx")) + data[...] = numpy.indices(data_shape).sum(0) + export_path = tmp_path / "test.zarr" + source_op = OpArrayPiper(graph=graph) + source_op.Input.setValue(data) + # max_blockshape should not affect chunk size on disc or scaling decision, + # but computations and scaling are broken up into blocks. + source_op.Output.meta.max_blockshape = computation_block_shape + progress = mock.Mock() with Timer() as timer: write_ome_zarr(str(export_path), source_op.Output, progress) duration = timer.seconds() @@ -114,6 +153,7 @@ def test_downscaling(tmp_path, graph, data_shape, expected_scalings): store = zarr.open(str(export_path)) meta = store.attrs["multiscales"][0] assert len(meta["datasets"]) == len(expected_scalings) + numpy.testing.assert_array_equal(store[meta["datasets"][0]["path"]], data) for i, scaling in enumerate(expected_scalings): dataset = meta["datasets"][i] @@ -121,6 +161,16 @@ def test_downscaling(tmp_path, graph, data_shape, expected_scalings): transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale" ] assert scale_transforms[0]["scale"] == scaling + # Makes sure that the blockwise-scaled image is identical to downscaling the data at once + if scaling == [1.0, 1.0, 4.0, 4.0, 4.0]: + downscaled_data = data[:, :, ::4, ::4, ::4] + numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) + elif scaling == [1.0, 1.0, 2.0, 2.0, 2.0]: + downscaled_data = data[:, :, ::2, ::2, ::2] + numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) + elif scaling == [1.0, 1.0, 1.0, 2.0, 2.0]: + downscaled_data = data[:, :, :, ::2, ::2] + numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) def test_downscaling_raises(): @@ -134,3 +184,27 @@ def test_downscaling_raises(): insane_data_shape = OrderedDict({"t": 1, "c": 1, "z": 1, "y": 1, "x": insane_length}) with pytest.raises(ValueError, match="Too many scales"): _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), None) + + +def test_blockwise_downsampling_edge_cases(): + """Ensures that downsampling can handle blocks smaller than scaling step size, + and starts that are not a multiple of block size (both of which can occur in the last block + along an axis). + Also ensures that the correct pixels are sampled when starting in the middle of a step (here x), + exactly at the start of a step (here y), and when no step start is within block (0 sampled along z).""" + # Tested at implementation level because passing odd scaling and data shapes is easier this way. + data_shape = (1, 1, 15, 15, 25) + step_size = 11 + data = vigra.VigraArray(data_shape, axistags=vigra.defaultAxistags("tczyx")) + data[...] = numpy.indices(data_shape).sum(0) + roi = ([0, 0, 6, step_size, 24], [1, 1, 15, 15, 25]) + block = data[roiToSlice(*roi)] + expected_scaled_roi = ([0, 0, 1, 1, 3], [1, 1, 2, 2, 3]) + expected_scaled_block = data[:, :, ::step_size, ::step_size, ::step_size][roiToSlice(*expected_scaled_roi)] + scaling = OrderedDict([("t", 1), ("c", 1), ("z", step_size), ("y", step_size), ("x", step_size)]) + + scaled_block, scaled_roi = _apply_scaling_method(block, roi, scaling) + + assert scaled_block.shape == expected_scaled_block.shape + numpy.testing.assert_array_equal(scaled_block, expected_scaled_block) + assert scaled_roi == expected_scaled_roi From e85ee1f4b60bbed448b59189b5d44c918c96d89c Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:12:05 +0200 Subject: [PATCH 07/35] OME-Zarr export: Name according to spec No leading /, which internalPath keeps so that `external + internal = total` path --- lazyflow/utility/io_util/write_ome_zarr.py | 4 ++-- .../test_io_util/test_write_ome_zarr.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index ea1da4e8b..9bd43facf 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -135,7 +135,7 @@ def _compute_and_write_scales( ) -> List[ImageMetadata]: pc = PathComponents(export_path) external_path = pc.externalPath - internal_path = pc.internalPath + internal_path = pc.internalPath.lstrip("/") if pc.internalPath else None store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) chunk_shape = _get_chunk_shape(image_source_slot) @@ -177,7 +177,7 @@ def _write_ome_zarr_and_ilastik_metadata( ): pc = PathComponents(export_path) external_path = pc.externalPath - multiscale_name = pc.internalPath + multiscale_name = pc.internalPath.lstrip("/") if pc.internalPath else None ilastik_signature = {"name": "ilastik", "version": ilastik_version, "ome_zarr_exporter_version": 1} axis_types = {"t": "time", "c": "channel", "z": "space", "y": "space", "x": "space"} axes = [{"name": tag.key, "type": axis_types[tag.key]} for tag in ilastik_meta["axistags"]] diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index e6b39d304..c9f008873 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -208,3 +208,22 @@ def test_blockwise_downsampling_edge_cases(): assert scaled_block.shape == expected_scaled_block.shape numpy.testing.assert_array_equal(scaled_block, expected_scaled_block) assert scaled_roi == expected_scaled_roi + + +def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph, data_array): + export_path = tmp_path / "test.zarr/predictions/first_attempt" + source_op = OpArrayPiper(graph=graph) + source_op.Input.setValue(data_array) + progress = mock.Mock() + write_ome_zarr(str(export_path), source_op.Output, progress) + + assert export_path.exists() + store = zarr.open(str(tmp_path / "test.zarr")) + assert "multiscales" in store.attrs + m = store.attrs["multiscales"][0] + assert all(key in m for key in ("datasets", "axes", "version", "name")) + assert m["version"] == "0.4" + assert m["name"] == "predictions/first_attempt" + assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] + assert all(dataset["path"] in store for dataset in m["datasets"]) + assert all(dataset["path"][0] != "/" in store for dataset in m["datasets"]) From c2ea78d5d9c33f7fcde2463bf9d6cc4fad672dce Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 29 Aug 2024 12:32:20 +0200 Subject: [PATCH 08/35] OME-Zarr export tests: Remove benchmarking and fixture --- .../test_io_util/test_write_ome_zarr.py | 63 ++++++------------- 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index c9f008873..58a1ebc32 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -9,52 +9,28 @@ from lazyflow.operators import OpArrayPiper from lazyflow.roi import roiToSlice -from lazyflow.utility import Timer from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings, _apply_scaling_method -@pytest.fixture(params=["ilastik default order", "2d", "3d", "2dc", "125MiB"]) -def data_array(request) -> vigra.VigraArray: - shapes = { - "ilastik default order": (1, 128, 127, 10, 1), - "2d": (256, 255), - "3d": (10, 126, 125), - "2dc": (124, 123, 3), - "125MiB": (30, 1024, 1024), # 4 bytes per pixel - "500MiB": (125, 1024, 1024), # 4 bytes per pixel - "1GiB": (256, 1024, 1024), - "3GiB": (768, 1024, 1024), - "6GiB": (768, 2048, 1024), - } - axis_order = { - "ilastik default order": "txyzc", - "2d": "yx", - "3d": "zyx", - "2dc": "yxc", - "125MiB": "zyx", - "500MiB": "zyx", - "1GiB": "zyx", - "3GiB": "zyx", - "6GiB": "zyx", - } - shape = shapes[request.param] - data = vigra.VigraArray(shape, axistags=vigra.defaultAxistags(axis_order[request.param])) - data[...] = numpy.indices(shape).sum(0) - return data - - -def test_metadata_integrity(tmp_path, graph, data_array): +@pytest.mark.parametrize( + "shape, axes", + [ + ((1, 128, 127, 10, 1), "txyzc"), # ilastik default order + ((1, 1, 3, 26, 25), "tczyx"), # OME-Zarr convention + ((256, 255), "yx"), + ((10, 126, 125), "zyx"), + ((124, 123, 3), "yxc"), + ], +) +def test_metadata_integrity(tmp_path, graph, shape, axes): + data_array = vigra.VigraArray(shape, axistags=vigra.defaultAxistags(axes)) + data_array[...] = numpy.indices(shape).sum(0) export_path = tmp_path / "test.zarr" source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data_array) progress = mock.Mock() - with Timer() as timer: - write_ome_zarr(str(export_path), source_op.Output, progress) - duration = timer.seconds() - # Manual benchmarking - raw_size = math.prod(data_array.shape) * data_array.dtype.type().nbytes - print(";" f"{data_array.shape};" f"{data_array.dtype};" f"{raw_size};" f"{duration};" f"{duration / raw_size};") + write_ome_zarr(str(export_path), source_op.Output, progress) expected_axiskeys = "tczyx" assert export_path.exists() @@ -142,13 +118,8 @@ def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expec # but computations and scaling are broken up into blocks. source_op.Output.meta.max_blockshape = computation_block_shape progress = mock.Mock() - with Timer() as timer: - write_ome_zarr(str(export_path), source_op.Output, progress) - duration = timer.seconds() - # Manual benchmarking - raw_size = math.prod(data.shape) * data.dtype.type().nbytes - print(";" f"{data.shape};" f"{data.dtype};" f"{raw_size};" f"{duration};" f"{duration / raw_size};") + write_ome_zarr(str(export_path), source_op.Output, progress) store = zarr.open(str(export_path)) meta = store.attrs["multiscales"][0] @@ -210,7 +181,9 @@ def test_blockwise_downsampling_edge_cases(): assert scaled_roi == expected_scaled_roi -def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph, data_array): +def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph): + data_array = vigra.VigraArray((2, 2, 5, 5, 5), axistags=vigra.defaultAxistags("tczyx")) + data_array[...] = numpy.indices((2, 2, 5, 5, 5)).sum(0) export_path = tmp_path / "test.zarr/predictions/first_attempt" source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data_array) From 1144e051b5e2575353ac839ced75218a0ff08114 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:42:35 +0200 Subject: [PATCH 09/35] OME-Zarr export: T and C should not affect scaling --- lazyflow/utility/io_util/write_ome_zarr.py | 25 +++++++++++-------- .../test_io_util/test_write_ome_zarr.py | 1 + 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 9bd43facf..a4b83947a 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -22,6 +22,8 @@ TaggedShape = OrderedDict[str, int] # axis: size OrderedScaling = OrderedDict[str, float] # axis: scale +SPATIAL_AXES = ["z", "y", "x"] + @dataclasses.dataclass class ImageMetadata: @@ -59,7 +61,6 @@ def _get_scalings( Raises if more than 20 scales are computed (sanity). """ assert len(chunk_shape) == len(original_tagged_shape), "Chunk shape and tagged shape must have same length" - spatial = ["z", "y", "x"] original_scale = ODict([(a, 1.0) for a in original_tagged_shape.keys()]) scalings = [original_scale] sanity_limit = 20 @@ -68,15 +69,15 @@ def _get_scalings( raise ValueError(f"Too many scales computed, limit={sanity_limit}. Please report this to the developers.") new_scaling = ODict( [ - (a, 2.0 ** (i + 1)) if a in spatial and original_tagged_shape[a] > 1 else (a, 1.0) + (a, 2.0 ** (i + 1)) if a in SPATIAL_AXES and original_tagged_shape[a] > 1 else (a, 1.0) for a in original_tagged_shape.keys() ] ) new_shape = _scale_tagged_shape(original_tagged_shape, new_scaling) if ( _is_less_than_4_chunks(new_shape, chunk_shape) - or _reduces_any_axis_to_singleton(new_shape, tuple(original_tagged_shape.values())) - or (min_length and max(new_shape) < min_length) + or _reduces_any_axis_to_singleton(new_shape.values(), original_tagged_shape.values()) + or (min_length and max(new_shape.values()) < min_length) ): break scalings.append(new_scaling) @@ -87,14 +88,18 @@ def _reduces_any_axis_to_singleton(new_shape: Shape, original_shape: Shape): return any(new <= 1 < orig for new, orig in zip(new_shape, original_shape)) -def _is_less_than_4_chunks(new_shape: Shape, chunk_shape: Shape): - return numpy.prod(new_shape) < 4 * numpy.prod(chunk_shape) +def _is_less_than_4_chunks(new_shape: TaggedShape, chunk_shape: Shape): + spatial_shape = [s for a, s in new_shape.items() if a in SPATIAL_AXES] + return numpy.prod(spatial_shape) < 4 * numpy.prod(chunk_shape) -def _scale_tagged_shape(original_tagged_shape: TaggedShape, scaling: OrderedScaling) -> Shape: +def _scale_tagged_shape(original_tagged_shape: TaggedShape, scaling: OrderedScaling) -> TaggedShape: assert all(s > 0 for s in scaling.values()), f"Invalid scaling: {scaling}" - return tuple( - _round_like_scaling_method(s / scaling[a]) if a in scaling else s for a, s in original_tagged_shape.items() + return ODict( + [ + (a, _round_like_scaling_method(s / scaling[a]) if a in scaling else s) + for a, s in original_tagged_shape.items() + ] ) @@ -144,7 +149,7 @@ def _compute_and_write_scales( meta = [] for i, scaling in enumerate(scalings): scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" - scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling) + scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling).values() zarrays.append( zarr.creation.empty( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 58a1ebc32..c76864186 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -72,6 +72,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): (1, 1, 4, 1008, 1010), # Just under criterion to be scaled (1, 1, 1, 30, 30), # Tiny (1, 1, 2, 1432, 1432), # No reduction to singleton or anisotropic scaling + (3, 3, 67, 79, 97), # Big enough to scale when taking c and t into account (which we shouldn't) ], ) def test_writes_with_no_scaling(tmp_path, graph, data_shape): From cc3ffd2abea664cf2fbeb6dd0a8b9275095de702 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:57:56 +0200 Subject: [PATCH 10/35] OME-Zarr export: Overwrite existing datasets --- lazyflow/utility/io_util/write_ome_zarr.py | 5 ++++- .../test_io_util/test_write_ome_zarr.py | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index a4b83947a..df4f2e847 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -7,7 +7,7 @@ import numpy import zarr -from zarr.storage import FSStore +from zarr.storage import FSStore, contains_array from ilastik import __version__ as ilastik_version from lazyflow.operators import OpReorderAxes @@ -150,6 +150,9 @@ def _compute_and_write_scales( for i, scaling in enumerate(scalings): scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling).values() + if contains_array(store, scale_path): + logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") + del store[scale_path] zarrays.append( zarr.creation.empty( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index c76864186..9619443a9 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -201,3 +201,23 @@ def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph): assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] assert all(dataset["path"] in store for dataset in m["datasets"]) assert all(dataset["path"][0] != "/" in store for dataset in m["datasets"]) + + +def test_overwrite_existing_store(tmp_path, graph): + data_array = vigra.VigraArray((2, 2, 5, 5, 5), axistags=vigra.defaultAxistags("tczyx")) + data_array[...] = numpy.indices((2, 2, 5, 5, 5)).sum(0) + data_array2 = vigra.VigraArray((1, 1, 3, 3, 3), axistags=vigra.defaultAxistags("tczyx")) + data_array2[...] = numpy.indices((1, 1, 3, 3, 3)).sum(0) + export_path = tmp_path / "test.zarr" + source_op = OpArrayPiper(graph=graph) + progress = mock.Mock() + source_op.Input.setValue(data_array) + write_ome_zarr(str(export_path), source_op.Output, progress) + source_op.Input.setValue(data_array2) + write_ome_zarr(str(export_path), source_op.Output, progress) + store = zarr.open(str(tmp_path / "test.zarr")) + assert "multiscales" in store.attrs + m = store.attrs["multiscales"][0] + assert "datasets" in m and "path" in m["datasets"][0] + written_data = store[m["datasets"][0]["path"]] + numpy.testing.assert_array_equal(written_data, data_array2) From 8228351579cb7a1486cb0f0eab46144a2a4efe66 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 6 Sep 2024 12:56:15 +0200 Subject: [PATCH 11/35] OME-Zarr export: Implement option to downscale as separate format --- .../operators/ioOperators/opExportSlot.py | 39 ++++++++++++++----- lazyflow/utility/io_util/write_ome_zarr.py | 21 +++++----- .../test_io_util/test_write_ome_zarr.py | 27 ++++++------- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 1a8bc9023..3d330b29f 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -89,7 +89,8 @@ class OpExportSlot(Operator): FormatInfo("compressed hdf5", "h5", 0, 5), FormatInfo("n5", "n5", 0, 5), FormatInfo("compressed n5", "n5", 0, 5), - FormatInfo("OME-Zarr", "zarr", 0, 5), + FormatInfo("single-scale OME-Zarr", "zarr", 0, 5), + FormatInfo("multi-scale OME-Zarr", "zarr", 0, 5), FormatInfo("numpy", "npy", 0, 5), FormatInfo("dvid", "", 2, 5), FormatInfo("blockwise hdf5", "json", 0, 5), @@ -106,7 +107,8 @@ def __init__(self, *args, **kwargs): export_impls["compressed hdf5"] = ("h5", partial(self._export_h5n5, True)) export_impls["n5"] = ("n5", self._export_h5n5) export_impls["compressed n5"] = ("n5", partial(self._export_h5n5, True)) - export_impls["OME-Zarr"] = ("zarr", self._export_ome_zarr) + export_impls["single-scale OME-Zarr"] = ("zarr", self._export_ome_zarr) + export_impls["multi-scale OME-Zarr"] = ("zarr", partial(self._export_ome_zarr, True)) export_impls["numpy"] = ("npy", self._export_npy) export_impls["dvid"] = ("", self._export_dvid) export_impls["blockwise hdf5"] = ("json", self._export_blockwise_hdf5) @@ -148,7 +150,14 @@ def _executeExportPath(self, result): path_format += "." + file_extension # Provide the TOTAL path (including dataset name) - if self.OutputFormat.value in ("hdf5", "compressed hdf5", "n5", "compressed n5", "OME-Zarr"): + if self.OutputFormat.value in ( + "hdf5", + "compressed hdf5", + "n5", + "compressed n5", + "single-scale OME-Zarr", + "multi-scale OME-Zarr", + ): path_format += "/" + self.OutputInternalPath.value roi = numpy.array(roiFromShape(self.Input.meta.shape)) @@ -183,7 +192,16 @@ def _get_format_selection_error_msg(self, *args): output_format = self.OutputFormat.value # These cases support all combinations - if output_format in ("hdf5", "compressed hdf5", "n5", "compressed n5", "npy", "blockwise hdf5", "OME-Zarr"): + if output_format in ( + "hdf5", + "compressed hdf5", + "n5", + "compressed n5", + "npy", + "blockwise hdf5", + "single-scale OME-Zarr", + "multi-scale OME-Zarr", + ): return "" tagged_shape = self.Input.meta.getTaggedShape() @@ -396,10 +414,10 @@ def _export_multipage_tiff_sequence(self): opExport.cleanUp() self.progressSignal(100) - def _export_ome_zarr(self): + def _export_ome_zarr(self, compute_downscales: bool = False): self.progressSignal(0) try: - write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal) + write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal, compute_downscales) finally: self.progressSignal(100) @@ -447,7 +465,8 @@ class FormatValidity(object): "compressed hdf5": ALL_DTYPES, "n5": ALL_DTYPES, "compressed n5": ALL_DTYPES, - "OME-Zarr": ALL_DTYPES, + "single-scale OME-Zarr": ALL_DTYPES, + "multi-scale OME-Zarr": ALL_DTYPES, } # { extension : (min_ndim, max_ndim) } @@ -468,7 +487,8 @@ class FormatValidity(object): "compressed hdf5": (0, 5), "n5": (0, 5), "compressed n5": (0, 5), - "OME-Zarr": (0, 5), + "single-scale OME-Zarr": (0, 5), + "multi-scale OME-Zarr": (0, 5), } # { extension : [allowed_num_channels] } @@ -489,7 +509,8 @@ class FormatValidity(object): "compressed hdf5": (), # ditto "n5": (), # ditto "compressed n5": (), # ditto - "OME-Zarr": (), + "single-scale OME-Zarr": (), + "multi-scale OME-Zarr": (), } @classmethod diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index df4f2e847..6b733404d 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -3,7 +3,7 @@ import math from collections import OrderedDict as ODict from functools import partial -from typing import List, Tuple, Dict, Optional, OrderedDict +from typing import List, Tuple, Dict, OrderedDict import numpy import zarr @@ -47,7 +47,7 @@ def _get_chunk_shape(image_source_slot: Slot) -> Shape: def _get_scalings( - original_tagged_shape: TaggedShape, chunk_shape: Shape, min_length: Optional[int] + original_tagged_shape: TaggedShape, chunk_shape: Shape, compute_downscales: bool ) -> List[OrderedScaling]: """ Computes scaling "factors". @@ -55,7 +55,6 @@ def _get_scalings( Downscaling is done by a factor of 2 in all spatial dimensions until: - the dataset would be less than 4 x chunk size (2MiB) - an axis that started non-singleton would become singleton - - the largest axis would be smaller than min_length (if defined). Returns list of scaling factor dicts by axis, starting with original scale. The scaling level that meets one of the exit conditions is excluded. Raises if more than 20 scales are computed (sanity). @@ -63,6 +62,8 @@ def _get_scalings( assert len(chunk_shape) == len(original_tagged_shape), "Chunk shape and tagged shape must have same length" original_scale = ODict([(a, 1.0) for a in original_tagged_shape.keys()]) scalings = [original_scale] + if not compute_downscales: + return scalings sanity_limit = 20 for i in range(sanity_limit): if i == sanity_limit: @@ -74,10 +75,8 @@ def _get_scalings( ] ) new_shape = _scale_tagged_shape(original_tagged_shape, new_scaling) - if ( - _is_less_than_4_chunks(new_shape, chunk_shape) - or _reduces_any_axis_to_singleton(new_shape.values(), original_tagged_shape.values()) - or (min_length and max(new_shape.values()) < min_length) + if _is_less_than_4_chunks(new_shape, chunk_shape) or _reduces_any_axis_to_singleton( + new_shape.values(), original_tagged_shape.values() ): break scalings.append(new_scaling) @@ -136,7 +135,7 @@ def _apply_scaling_method( def _compute_and_write_scales( - export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, min_length: Optional[int] + export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, compute_downscales: bool ) -> List[ImageMetadata]: pc = PathComponents(export_path) external_path = pc.externalPath @@ -144,7 +143,7 @@ def _compute_and_write_scales( store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) chunk_shape = _get_chunk_shape(image_source_slot) - scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, min_length) + scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, compute_downscales) zarrays = [] meta = [] for i, scaling in enumerate(scalings): @@ -217,7 +216,7 @@ def write_ome_zarr( export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, - min_length: Optional[int] = None, + compute_downscales: bool = False, ): op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") @@ -225,7 +224,7 @@ def write_ome_zarr( op_reorder.Input.connect(image_source_slot) image_source = op_reorder.Output progress_signal(25) - ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal, min_length) + ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal, compute_downscales) progress_signal(95) _write_ome_zarr_and_ilastik_metadata( export_path, diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 9619443a9..0e6af6ffb 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -30,7 +30,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) expected_axiskeys = "tczyx" assert export_path.exists() @@ -66,16 +66,17 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): @pytest.mark.parametrize( - "data_shape", + "data_shape,scaling_on", [ # Criterion: 4 x chunk size, i.e.: 4 * math.prod(4, 179, 178) -- times 8 for scaling by 2 in 3D - (1, 1, 4, 1008, 1010), # Just under criterion to be scaled - (1, 1, 1, 30, 30), # Tiny - (1, 1, 2, 1432, 1432), # No reduction to singleton or anisotropic scaling - (3, 3, 67, 79, 97), # Big enough to scale when taking c and t into account (which we shouldn't) + ((1, 1, 4, 1008, 1010), True), # Just under criterion to be scaled + ((1, 1, 1, 30, 30), True), # Tiny + ((1, 1, 2, 1432, 1432), True), # No reduction to singleton or anisotropic scaling + ((3, 3, 67, 79, 97), True), # Big enough to scale when taking c and t into account (which we shouldn't) + ((1, 1, 4, 1432, 1432), False), # Big enough but switched off ], ) -def test_writes_with_no_scaling(tmp_path, graph, data_shape): +def test_writes_with_no_scaling(tmp_path, graph, data_shape, scaling_on): data = vigra.VigraArray(data_shape, axistags=vigra.defaultAxistags("tczyx")) data[...] = numpy.indices(data_shape).sum(0) export_path = tmp_path / "test.zarr" @@ -83,7 +84,7 @@ def test_writes_with_no_scaling(tmp_path, graph, data_shape): source_op.Input.setValue(data) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=scaling_on) store = zarr.open(str(export_path)) meta = store.attrs["multiscales"][0] @@ -120,7 +121,7 @@ def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expec source_op.Output.meta.max_blockshape = computation_block_shape progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) store = zarr.open(str(export_path)) meta = store.attrs["multiscales"][0] @@ -155,7 +156,7 @@ def test_downscaling_raises(): insane_length = chunk_length * (scaling_factor**sanity_limit) * minimum_chunks_per_scale insane_data_shape = OrderedDict({"t": 1, "c": 1, "z": 1, "y": 1, "x": insane_length}) with pytest.raises(ValueError, match="Too many scales"): - _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), None) + _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), compute_downscales=True) def test_blockwise_downsampling_edge_cases(): @@ -189,7 +190,7 @@ def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph): source_op = OpArrayPiper(graph=graph) source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) assert export_path.exists() store = zarr.open(str(tmp_path / "test.zarr")) @@ -212,9 +213,9 @@ def test_overwrite_existing_store(tmp_path, graph): source_op = OpArrayPiper(graph=graph) progress = mock.Mock() source_op.Input.setValue(data_array) - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) source_op.Input.setValue(data_array2) - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) store = zarr.open(str(tmp_path / "test.zarr")) assert "multiscales" in store.attrs m = store.attrs["multiscales"][0] From 19ae7945fb640f41bf8f7f55c7277542df48e29b Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:39:45 +0200 Subject: [PATCH 12/35] OME-Zarr export: Revert to single-scale only Downscaling is a bit more complex, leave for another PR --- .../operators/ioOperators/opExportSlot.py | 3 +- lazyflow/utility/io_util/write_ome_zarr.py | 33 +++++-------------- .../test_io_util/test_write_ome_zarr.py | 3 ++ 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 3d330b29f..4edb38deb 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -200,7 +200,6 @@ def _get_format_selection_error_msg(self, *args): "npy", "blockwise hdf5", "single-scale OME-Zarr", - "multi-scale OME-Zarr", ): return "" @@ -415,6 +414,8 @@ def _export_multipage_tiff_sequence(self): self.progressSignal(100) def _export_ome_zarr(self, compute_downscales: bool = False): + if compute_downscales: + raise NotImplementedError() self.progressSignal(0) try: write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal, compute_downscales) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 6b733404d..6a502995a 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,6 +1,5 @@ import dataclasses import logging -import math from collections import OrderedDict as ODict from functools import partial from typing import List, Tuple, Dict, OrderedDict @@ -79,7 +78,7 @@ def _get_scalings( new_shape.values(), original_tagged_shape.values() ): break - scalings.append(new_scaling) + raise NotImplementedError("See _apply_scaling_method()") # scalings.append(new_scaling) return scalings @@ -104,34 +103,18 @@ def _scale_tagged_shape(original_tagged_shape: TaggedShape, scaling: OrderedScal def _round_like_scaling_method(value: float) -> int: """For calculating scaled shape after applying the scaling method. - Different scaling methods might round differently, so we need to match that.""" - # Currently the only rounding method is 2-step indexing of numpy array, which always rounds up - # numpy.ones(7)[::2].shape == (4,) - return math.ceil(value) + Different scaling methods round differently, so we need to match that. + E.g. scaling by stepwise downsampling like image[::2, ::2] always rounds up, + while e.g. skimage.transform.rescale rounds mathematically like standard round().""" + return int(value) def _apply_scaling_method( data: numpy.typing.NDArray, current_block_roi: Tuple[List[int], List[int]], scaling: OrderedScaling ) -> Tuple[numpy.typing.NDArray, Tuple[List[int], List[int]]]: - """Downscale data by applying scaling factors to spatial dimensions. - Ordering of `data.shape`, scaling and current_block_roi must match. - Needs to know block roi to determine position of the scaled block within the total scaled image. - Returns scaled data and scaled roi because the roi must be adjusted for blockwise rounding. - """ - scaling_int = [int(s) for s in scaling.values()] - starts = current_block_roi[0] - # Specific to downsampling, where scale = step size. - # When scale=5, the pixels that should be included in the final result are at 0, 5, 10, 15, ... - # If e.g. start=22 for this block, the block must internally add crop=3, - # so that it globally starts at 25: 22 + 3 = 25, where 3 = 5 - (22 % 5) - block_start_crops = [ - (scale - (start % scale)) if start % scale > 0 else 0 for start, scale in zip(starts, scaling_int) - ] - crop_and_downsample_slicing = tuple(slice(crop, None, scale) for crop, scale in zip(block_start_crops, scaling_int)) - scaled_starts = [_round_like_scaling_method(start / scale) for start, scale in zip(starts, scaling_int)] - scaled_stops = [_round_like_scaling_method(stop / scale) for stop, scale in zip(current_block_roi[1], scaling_int)] - scaled_roi = (scaled_starts, scaled_stops) - return data[crop_and_downsample_slicing], scaled_roi + """Downscaling tbd, need to investigate whether blockwise scaling is feasible. + May have to restructure the flow instead and potentially do export blockwise, then scaling afterwards.""" + raise NotImplementedError() def _compute_and_write_scales( diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 0e6af6ffb..eddff0f81 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -95,6 +95,7 @@ def test_writes_with_no_scaling(tmp_path, graph, data_shape, scaling_on): assert scale_transforms[0]["scale"] == [1.0, 1.0, 1.0, 1.0, 1.0] +@pytest.mark.skip("To be implemented after releasing single-scale export") @pytest.mark.parametrize( "data_shape, computation_block_shape, expected_scalings", [ @@ -146,6 +147,7 @@ def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expec numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) +@pytest.mark.skip("To be implemented after releasing single-scale export") def test_downscaling_raises(): # Testing at the implementation level instead of top-level write_ome_zarr for simplicity. # Would need to set up a data array with an insane shape without actually allocating RAM for it. @@ -159,6 +161,7 @@ def test_downscaling_raises(): _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), compute_downscales=True) +@pytest.mark.skip("To be implemented after releasing single-scale export") def test_blockwise_downsampling_edge_cases(): """Ensures that downsampling can handle blocks smaller than scaling step size, and starts that are not a multiple of block size (both of which can occur in the last block From 220164ddab65b0aab9a14914aa1e4347f7241b1a Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 13 Sep 2024 17:29:07 +0200 Subject: [PATCH 13/35] Multiscale: Maintain scale order internally, reverse in GUI Enforce highest-to-lowest so that GUI can trust that it needs to reverse the list --- .../dataSelection/datasetDetailedInfoTableModel.py | 8 +++++++- lazyflow/utility/io_util/OMEZarrStore.py | 2 -- .../utility/io_util/RESTfulPrecomputedChunkedVolume.py | 5 +---- lazyflow/utility/io_util/multiscaleStore.py | 8 ++++---- .../test_applets/dataSelection/testOpDataSelection.py | 8 ++++++-- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py index 270445e5a..8788985a4 100644 --- a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py +++ b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py @@ -224,7 +224,13 @@ def get_scale_options(self, laneIndex) -> Dict[str, str]: datasetInfo = datasetSlot.value if not datasetInfo.scales: return {} - return {key: _dims_to_display_string(dims, datasetInfo.axiskeys) for key, dims in datasetInfo.scales.items()} + # Reverse the scale list: + # Multiscale datasets always list scales from original (largest) to most-downscaled (smallest). + # We want to display them in the opposite order. + return { + key: _dims_to_display_string(dims, datasetInfo.axiskeys) + for key, dims in reversed(datasetInfo.scales.items()) + } def is_scale_locked(self, laneIndex) -> bool: datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex] diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index 8ed92b790..1f8a237b8 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -196,8 +196,6 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): "shape": zarray.shape, } logger.info(f"Initializing scale {scale_key} took {timer.seconds()*1000} ms.") - # Reverse so that GUI displays from low to high resolution - gui_scale_metadata = OrderedDict(reversed(list(gui_scale_metadata.items()))) super().__init__( dtype=dtype, axistags=axistags, diff --git a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py index 55e746610..b556c3019 100644 --- a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py +++ b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py @@ -98,10 +98,7 @@ def __init__(self, volume_url: str, n_threads=4): # Scales are ordered from original to most-downscaled in Precomputed spec lowest_resolution_key = self._json_info["scales"][-1]["key"] highest_resolution_key = self._json_info["scales"][0]["key"] - # Reverse so that the ScaleComboBox shows the options ordered from most-downscaled to original - gui_scale_metadata = OrderedDict( - [(scale["key"], scale["resolution"]) for scale in reversed(self._json_info["scales"])] - ) + gui_scale_metadata = OrderedDict([(scale["key"], scale["resolution"]) for scale in self._json_info["scales"]]) self._scales = {scale["key"]: scale for scale in self._json_info["scales"]} self.n_channels = self._json_info["num_channels"] diff --git a/lazyflow/utility/io_util/multiscaleStore.py b/lazyflow/utility/io_util/multiscaleStore.py index 98bfbd2c6..85b18cda3 100644 --- a/lazyflow/utility/io_util/multiscaleStore.py +++ b/lazyflow/utility/io_util/multiscaleStore.py @@ -52,7 +52,7 @@ def __init__( :param dtype: The dataset's numpy dtype. :param axistags: vigra.AxisTags describing the dataset's axes. :param multiscales: Dict of scale metadata for GUI/shell/project file. - Order as the scales should appear when displayed to the user. + Order from highest to lowest resolution (i.e. largest to smallest shape). Keys should be absolute identifiers for each scale as found in the dataset. Values are xyz dimensions (e.g. resolution or shape) of the image at each scale to inform user choice. :param lowest_resolution_key: Key of the lowest-resolution scale within the multiscales dict. @@ -65,9 +65,9 @@ def __init__( self.lowest_resolution_key = lowest_resolution_key self.highest_resolution_key = highest_resolution_key keys = list(self.multiscales.keys()) - assert (self.lowest_resolution_key == keys[0] and self.highest_resolution_key == keys[-1]) or ( - self.lowest_resolution_key == keys[-1] and self.highest_resolution_key == keys[0] - ), "Lowest and highest resolution keys must be at the extremes of the multiscales dict." + assert ( + self.highest_resolution_key == keys[0] and self.lowest_resolution_key == keys[-1] + ), "Multiscales dict must be ordered from highest to lowest resolution (i.e. largest to smallest shape)" @abstractmethod def get_shape(self, scale_key: str) -> Tuple[int]: diff --git a/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py b/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py index 6a30f7c28..c4a5dc085 100644 --- a/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py +++ b/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py @@ -874,11 +874,13 @@ def op(self, graph, monkeypatch, datasetInfo) -> OpDataSelection: return op def test_load_precomputed_chunks_over_http(self, op): + # Default scale should be lowest resolution loaded_scale0 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale0, self.IMAGE_SCALED.reshape((1, 1, 1, 10, 12))) + # Switch to original unscaled resolution (first in the list, see multiscaleStore.multiscales) scale_keys = list(op.Image.meta.scales.keys()) - op.ActiveScale.setValue(scale_keys[1]) + op.ActiveScale.setValue(scale_keys[0]) loaded_scale1 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale1, self.IMAGE_ORIGINAL.reshape((1, 1, 1, 20, 24))) @@ -974,11 +976,13 @@ def op(self, graph, monkeypatch, datasetInfo): return op def test_ome_zarr_loads_via_FSStore_and_ZarrArray(self, op, mock_ome_zarr_data): + # Default scale should be lowest resolution loaded_scale0 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale0, self.IMAGE_SCALED.reshape((1, 1, 1, 10, 12))) + # Switch to original unscaled resolution (first in the list, see multiscaleStore.multiscales) scale_keys = list(op.Image.meta.scales.keys()) - op.ActiveScale.setValue(scale_keys[1]) + op.ActiveScale.setValue(scale_keys[0]) loaded_scale1 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale1, self.IMAGE_ORIGINAL.reshape((1, 1, 1, 20, 24))) From 8df2ed7ca331d8011996fb0bb2d4db3863207de0 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 18 Sep 2024 17:50:39 +0200 Subject: [PATCH 14/35] OME-Zarr export/Multiscale: Match output scale to input * To make this possible, `slot.meta.scales` needs to contain usable shape values, not just some hints for a gui string. * Change GUI for Precomputed to also display shape, not resolution (not sure if showing the resolution metadata was helpful anyway) --- .../datasetDetailedInfoTableModel.py | 15 +++--- .../applets/dataSelection/opDataSelection.py | 2 +- lazyflow/utility/io_util/OMEZarrStore.py | 6 +-- .../RESTfulPrecomputedChunkedVolume.py | 17 +++++-- lazyflow/utility/io_util/multiscaleStore.py | 21 +++++--- lazyflow/utility/io_util/write_ome_zarr.py | 50 +++++++++++++++++-- .../test_io_util/test_write_ome_zarr.py | 47 +++++++++++++---- 7 files changed, 121 insertions(+), 37 deletions(-) diff --git a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py index 8788985a4..a2a372eb1 100644 --- a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py +++ b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py @@ -18,7 +18,7 @@ # on the ilastik web site at: # http://ilastik.org/license.html ############################################################################### -from typing import List, Dict +from typing import Dict from PyQt5.QtCore import Qt, QAbstractItemModel, QModelIndex from ilastik.utility import bind @@ -37,11 +37,10 @@ class DatasetColumn: NumColumns = 6 -def _dims_to_display_string(dimensions: List[int], axiskeys: str) -> str: - """Generate labels to put into the scale combobox. - Scale dimensions must be in xyz and will be reordered to match axiskeys.""" - input_axes = dict(zip("xyz", dimensions)) - reordered_dimensions = [input_axes[axis] for axis in axiskeys if axis in input_axes] +def _dims_to_display_string(dimensions: Dict[str, int], axiskeys: str) -> str: + """Generate labels to put into the scale combobox / to display in the table. + XYZ dimensions will be reordered to match axiskeys.""" + reordered_dimensions = [dimensions[axis] for axis in axiskeys if axis in "xyz"] return ", ".join(str(size) for size in reordered_dimensions) @@ -228,8 +227,8 @@ def get_scale_options(self, laneIndex) -> Dict[str, str]: # Multiscale datasets always list scales from original (largest) to most-downscaled (smallest). # We want to display them in the opposite order. return { - key: _dims_to_display_string(dims, datasetInfo.axiskeys) - for key, dims in reversed(datasetInfo.scales.items()) + key: _dims_to_display_string(tagged_shape, datasetInfo.axiskeys) + for key, tagged_shape in reversed(datasetInfo.scales.items()) } def is_scale_locked(self, laneIndex) -> bool: diff --git a/ilastik/applets/dataSelection/opDataSelection.py b/ilastik/applets/dataSelection/opDataSelection.py index 12800e5c5..30cece0f4 100644 --- a/ilastik/applets/dataSelection/opDataSelection.py +++ b/ilastik/applets/dataSelection/opDataSelection.py @@ -128,7 +128,7 @@ def __init__( self.legacy_datasetId = self.generate_id() self.working_scale = working_scale self.scale_locked = scale_locked - self.scales = OrderedDict() # {scale_key: scale_dimensions}, see MultiscaleStore.multiscales + self.scales = OrderedDict() # {scale_key: tagged_scale_shape}, see MultiscaleStore.multiscales @property def shape5d(self) -> Shape5D: diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index 1f8a237b8..0637cfdfd 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -178,7 +178,7 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): axistags = get_axistags_from_spec(multiscale_spec) datasets = multiscale_spec["datasets"] dtype = None - gui_scale_metadata = OrderedDict() # Becomes slot metadata -> must be serializable (no ZarrArray allowed) + scale_metadata = OrderedDict() # Becomes slot metadata -> must be serializable (no ZarrArray allowed) self._scale_data = {} if single_scale_mode: datasets = datasets[:1] # One scale is enough to get dtype @@ -189,7 +189,7 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): # As a bonus, this also validates all scale["path"] strings passed outside this class. zarray = ZarrArray(store=self._store, path=scale_key) dtype = zarray.dtype.type - gui_scale_metadata[scale_key] = list(zarray.shape[-1:-4:-1]) # xyz + scale_metadata[scale_key] = OrderedDict(zip([tag.key for tag in axistags], zarray.shape)) self._scale_data[scale_key] = { "zarray": zarray, "chunks": zarray.chunks, @@ -199,7 +199,7 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): super().__init__( dtype=dtype, axistags=axistags, - multiscales=gui_scale_metadata, + multiscales=scale_metadata, lowest_resolution_key=datasets[-1]["path"], highest_resolution_key=datasets[0]["path"], ) diff --git a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py index b556c3019..b86fa42b7 100644 --- a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py +++ b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py @@ -68,7 +68,7 @@ class RESTfulPrecomputedChunkedVolume(MultiscaleStore): "type": "object", "properties": { "key": {"type": "string"}, - "resolution": {"type": "array", "items": {"type": "number"}}, + "size": {"type": "array", "items": {"type": "number"}}, }, }, }, @@ -98,11 +98,20 @@ def __init__(self, volume_url: str, n_threads=4): # Scales are ordered from original to most-downscaled in Precomputed spec lowest_resolution_key = self._json_info["scales"][-1]["key"] highest_resolution_key = self._json_info["scales"][0]["key"] - gui_scale_metadata = OrderedDict([(scale["key"], scale["resolution"]) for scale in self._json_info["scales"]]) self._scales = {scale["key"]: scale for scale in self._json_info["scales"]} self.n_channels = self._json_info["num_channels"] - - super().__init__(dtype, axistags, gui_scale_metadata, lowest_resolution_key, highest_resolution_key) + scale_shapes = [ + OrderedDict(zip("czyx", [self.n_channels] + scale["size"])) for scale in self._json_info["scales"] + ] + scale_metadata = OrderedDict(zip(self._scales.keys(), scale_shapes)) + + super().__init__( + dtype=dtype, + axistags=axistags, + multiscales=scale_metadata, + lowest_resolution_key=lowest_resolution_key, + highest_resolution_key=highest_resolution_key, + ) @staticmethod def is_uri_compatible(uri: str) -> bool: diff --git a/lazyflow/utility/io_util/multiscaleStore.py b/lazyflow/utility/io_util/multiscaleStore.py index 85b18cda3..757d708f2 100644 --- a/lazyflow/utility/io_util/multiscaleStore.py +++ b/lazyflow/utility/io_util/multiscaleStore.py @@ -20,12 +20,13 @@ ############################################################################### from abc import ABCMeta, abstractmethod from collections import OrderedDict -from typing import List, Tuple +from typing import Literal, Tuple import numpy import vigra - +# See MultiscaleStore docstring for details +Multiscales = OrderedDict[str, OrderedDict[Literal["t", "c", "z", "y", "x"], int]] DEFAULT_SCALE_KEY = "" @@ -44,17 +45,18 @@ def __init__( self, dtype: numpy.dtype, axistags: vigra.AxisTags, - multiscales: OrderedDict[str, List[int]], + multiscales: Multiscales, lowest_resolution_key: str, highest_resolution_key: str, ): """ :param dtype: The dataset's numpy dtype. :param axistags: vigra.AxisTags describing the dataset's axes. - :param multiscales: Dict of scale metadata for GUI/shell/project file. + :param multiscales: Dict of scales for GUI and OME-Zarr export, {key: tagged shape} Order from highest to lowest resolution (i.e. largest to smallest shape). - Keys should be absolute identifiers for each scale as found in the dataset. - Values are xyz dimensions (e.g. resolution or shape) of the image at each scale to inform user choice. + Keys: absolute identifiers for each scale as found in the dataset. + Values: tagged shape dicts ({axis: size}) of the image at each scale. + Axis order in shape dicts must match axistags. :param lowest_resolution_key: Key of the lowest-resolution scale within the multiscales dict. This acts as the default scale after load until the user selects a different one. :param highest_resolution_key: Used to infer the maximum dataset size, and for legacy HBP-mode projects. @@ -64,10 +66,13 @@ def __init__( self.multiscales = multiscales self.lowest_resolution_key = lowest_resolution_key self.highest_resolution_key = highest_resolution_key - keys = list(self.multiscales.keys()) + scale_keys = list(self.multiscales.keys()) assert ( - self.highest_resolution_key == keys[0] and self.lowest_resolution_key == keys[-1] + self.highest_resolution_key == scale_keys[0] and self.lowest_resolution_key == scale_keys[-1] ), "Multiscales dict must be ordered from highest to lowest resolution (i.e. largest to smallest shape)" + assert all( + list(scale_shape.keys()) == axistags.keys() for scale_shape in self.multiscales.values() + ), "Multiscales values must be shape dicts for the given axistags" @abstractmethod def get_shape(self, scale_key: str) -> Tuple[int]: diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 6a502995a..08f853382 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -13,6 +13,7 @@ from lazyflow.roi import determineBlockShape, roiFromShape, roiToSlice from lazyflow.slot import Slot from lazyflow.utility import OrderedSignal, PathComponents, BigRequestStreamer +from lazyflow.utility.io_util import multiscaleStore from lazyflow.utility.io_util.OMEZarrStore import OME_ZARR_V_0_4_KWARGS logger = logging.getLogger(__name__) @@ -109,6 +110,38 @@ def _round_like_scaling_method(value: float) -> int: return int(value) +def _get_input_multiscales_matching_export_image( + image_source_slot: Slot, compute_downscales: bool +) -> multiscaleStore.Multiscales: + """Filter for multiscales entry that matches source image, plus lower scales if compute_downscales is True.""" + input_scales = image_source_slot.meta.scales + export_shape = image_source_slot.meta.getTaggedShape() + matching_scales = [] + # Multiscales is ordered from highest to lowest resolution, so start collecting once match found + match_found = False + for key, scale_shape in input_scales.items(): + if all(scale_shape[a] == export_shape[a] for a in scale_shape.keys()): + match_found = True + matching_scales.append((key, scale_shape)) + if not compute_downscales: + break + elif match_found: + matching_scales.append((key, scale_shape)) + assert len(matching_scales) > 0, "Should be impossible, input must be one of the scales" + return ODict(matching_scales) + + +def _multiscales_to_scalings(multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape) -> List[OrderedScaling]: + scalings = [] + for scale_shape in multiscales.values(): + # base_shape / scale_shape: See note on scaling divisors in _get_scalings + tagged_factors = ODict( + [(a, base / s) for a, s, base in zip(scale_shape.keys(), scale_shape.values(), base_shape.values())] + ) + scalings.append(tagged_factors) + return scalings + + def _apply_scaling_method( data: numpy.typing.NDArray, current_block_roi: Tuple[List[int], List[int]], scaling: OrderedScaling ) -> Tuple[numpy.typing.NDArray, Tuple[List[int], List[int]]]: @@ -126,11 +159,20 @@ def _compute_and_write_scales( store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) chunk_shape = _get_chunk_shape(image_source_slot) - scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, compute_downscales) + if "scales" in image_source_slot.meta and image_source_slot.meta.scales: + # Source image is already multiscale, match its scales + input_scales = _get_input_multiscales_matching_export_image(image_source_slot, compute_downscales) + scalings = _multiscales_to_scalings(input_scales, image_source_slot.meta.getTaggedShape()) + output_scales = ODict(zip(input_scales.keys(), scalings)) + else: + # Compute new scale levels + scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, compute_downscales) + output_scales = ODict(zip([f"s{i}" for i in range(len(scalings))], scalings)) + zarrays = [] meta = [] - for i, scaling in enumerate(scalings): - scale_path = f"{internal_path}/s{i}" if internal_path else f"s{i}" + for scale_key, scaling in output_scales.items(): + scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling).values() if contains_array(store, scale_path): logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") @@ -155,7 +197,7 @@ def scale_and_write_block(scalings_, zarrays_, roi, data): zarrays_[i_][slicing] = scaled_data requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) - requester.resultSignal.subscribe(partial(scale_and_write_block, scalings, zarrays)) + requester.resultSignal.subscribe(partial(scale_and_write_block, output_scales.values(), zarrays)) requester.progressSignal.subscribe(progress_signal) requester.execute() diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index eddff0f81..e4c3a4a79 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -9,6 +9,7 @@ from lazyflow.operators import OpArrayPiper from lazyflow.roi import roiToSlice +from lazyflow.utility.io_util import multiscaleStore from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings, _apply_scaling_method @@ -186,14 +187,19 @@ def test_blockwise_downsampling_edge_cases(): assert scaled_roi == expected_scaled_roi -def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph): +@pytest.fixture +def tiny_5d_vigra_array_piper(graph): data_array = vigra.VigraArray((2, 2, 5, 5, 5), axistags=vigra.defaultAxistags("tczyx")) data_array[...] = numpy.indices((2, 2, 5, 5, 5)).sum(0) + op = OpArrayPiper(graph=graph) + op.Input.setValue(data_array) + return op + + +def test_write_new_ome_zarr_with_name_on_disc(tmp_path, tiny_5d_vigra_array_piper): export_path = tmp_path / "test.zarr/predictions/first_attempt" - source_op = OpArrayPiper(graph=graph) - source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path), tiny_5d_vigra_array_piper.Output, progress, compute_downscales=True) assert export_path.exists() store = zarr.open(str(tmp_path / "test.zarr")) @@ -207,15 +213,12 @@ def test_write_new_ome_zarr_with_name_on_disc(tmp_path, graph): assert all(dataset["path"][0] != "/" in store for dataset in m["datasets"]) -def test_overwrite_existing_store(tmp_path, graph): - data_array = vigra.VigraArray((2, 2, 5, 5, 5), axistags=vigra.defaultAxistags("tczyx")) - data_array[...] = numpy.indices((2, 2, 5, 5, 5)).sum(0) +def test_overwrite_existing_store(tmp_path, tiny_5d_vigra_array_piper): data_array2 = vigra.VigraArray((1, 1, 3, 3, 3), axistags=vigra.defaultAxistags("tczyx")) data_array2[...] = numpy.indices((1, 1, 3, 3, 3)).sum(0) export_path = tmp_path / "test.zarr" - source_op = OpArrayPiper(graph=graph) + source_op = tiny_5d_vigra_array_piper progress = mock.Mock() - source_op.Input.setValue(data_array) write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) source_op.Input.setValue(data_array2) write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) @@ -225,3 +228,29 @@ def test_overwrite_existing_store(tmp_path, graph): assert "datasets" in m and "path" in m["datasets"][0] written_data = store[m["datasets"][0]["path"]] numpy.testing.assert_array_equal(written_data, data_array2) + + +def test_match_scale_key_to_input(tmp_path, tiny_5d_vigra_array_piper): + """If the source slot has scale metadata, the export should match the scale name to the input.""" + store_path = tmp_path / "test.zarr" + export_path = store_path / "subdir" + source_op = tiny_5d_vigra_array_piper + progress = mock.Mock() + multiscales: multiscaleStore.Multiscales = OrderedDict( + [ + ("raw_scale", OrderedDict([("t", 2), ("c", 2), ("z", 10), ("y", 10), ("x", 10)])), + ("matching_scale", OrderedDict([("t", 2), ("c", 2), ("z", 5), ("y", 5), ("x", 5)])), + ("downscale", OrderedDict([("t", 2), ("c", 2), ("z", 2), ("y", 2), ("x", 2)])), + ] + ) + source_op.Output.meta.scales = multiscales + + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) + + store = zarr.open(str(store_path)) + assert "multiscales" in store.attrs + m = store.attrs["multiscales"][0] + assert "datasets" in m and "path" in m["datasets"][0] + assert len(m["datasets"]) == 1 + assert m["datasets"][0]["path"] == "subdir/matching_scale" + assert m["datasets"][0]["coordinateTransformations"][0]["scale"] == [1.0, 1.0, 1.0, 1.0, 1.0] From f9a54656ea7a31e8398aa7c48bb223ae4d5d36cd Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:36:17 +0200 Subject: [PATCH 15/35] OME-Zarr export: Fill value 0 to make output compatible with FIJI --- lazyflow/utility/io_util/write_ome_zarr.py | 2 +- .../test_utility/test_io_util/test_write_ome_zarr.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 08f853382..73278e444 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -178,7 +178,7 @@ def _compute_and_write_scales( logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") del store[scale_path] zarrays.append( - zarr.creation.empty( + zarr.creation.zeros( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype ) ) diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index e4c3a4a79..a1d4df536 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -50,6 +50,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): assert dataset["path"] in store discovered_keys.append(dataset["path"]) written_array = store[dataset["path"]] + assert written_array.fill_value is not None, "FIJI and z5py don't open zarrays without a fill_value" assert "axistags" in written_array.attrs, f"no axistags for {dataset['path']}" assert vigra.AxisTags.fromJSON(written_array.attrs["axistags"]) == vigra.defaultAxistags(expected_axiskeys) assert all([value is not None for value in written_array.attrs.values()]) # Should not write None anywhere From fc25aaca18c4df56029129524bc8940088931b92 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:13:58 +0200 Subject: [PATCH 16/35] OME-Zarr: Remove internal path from scale key Two purposes: * When the export writes to this key, it does not add extra subdirs to the export * In the future if we want to correlate scales across data roles, we will probably want to match keys independent of external or internal path --- lazyflow/utility/io_util/OMEZarrStore.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index 0637cfdfd..7f43aa697 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -184,10 +184,10 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): datasets = datasets[:1] # One scale is enough to get dtype for scale in datasets: # OME-Zarr spec requires datasets ordered from high to low resolution with Timer() as timer: - scale_key = scale["path"] - # Loading a ZarrArray at this path is necessary to obtain the scale dimensions for the GUI. - # As a bonus, this also validates all scale["path"] strings passed outside this class. - zarray = ZarrArray(store=self._store, path=scale_key) + scale_path = scale["path"] + scale_key = scale_path.split("/")[-1] + # Loading a ZarrArray at this path is necessary to obtain the scale dimensions for the GUI + zarray = ZarrArray(store=self._store, path=scale_path) dtype = zarray.dtype.type scale_metadata[scale_key] = OrderedDict(zip([tag.key for tag in axistags], zarray.shape)) self._scale_data[scale_key] = { @@ -200,8 +200,8 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): dtype=dtype, axistags=axistags, multiscales=scale_metadata, - lowest_resolution_key=datasets[-1]["path"], - highest_resolution_key=datasets[0]["path"], + lowest_resolution_key=list(scale_metadata.keys())[-1], + highest_resolution_key=list(scale_metadata.keys())[0], ) @staticmethod From 65a615c24c6609e156f185b7365136d90caf804e Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:24:20 +0200 Subject: [PATCH 17/35] OME-Zarr export: Refactor exported zarrays, use scale keys --- lazyflow/utility/io_util/write_ome_zarr.py | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 73278e444..a675655fb 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -150,6 +150,20 @@ def _apply_scaling_method( raise NotImplementedError() +def _scale_and_write_block(scales: OrderedDict[str, OrderedScaling], zarrays: OrderedDict[str, zarr.Array], roi, data): + assert scales.keys() == zarrays.keys() + for scale_key_, scaling_ in scales.items(): + if scaling_["x"] > 1.0 or scaling_["y"] > 1.0: + logger.info(f"Scale {scale_key_}: Applying {scaling_=} to {roi=}") + scaled_data, scaled_roi = _apply_scaling_method(data, roi, scaling_) + slicing = roiToSlice(*scaled_roi) + else: + slicing = roiToSlice(*roi) + scaled_data = data + logger.info(f"Scale {scale_key_}: Writing data with shape={scaled_data.shape} to {slicing=}") + zarrays[scale_key_][slicing] = scaled_data + + def _compute_and_write_scales( export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, compute_downscales: bool ) -> List[ImageMetadata]: @@ -169,7 +183,7 @@ def _compute_and_write_scales( scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, compute_downscales) output_scales = ODict(zip([f"s{i}" for i in range(len(scalings))], scalings)) - zarrays = [] + zarrays = ODict() meta = [] for scale_key, scaling in output_scales.items(): scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key @@ -177,27 +191,13 @@ def _compute_and_write_scales( if contains_array(store, scale_path): logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") del store[scale_path] - zarrays.append( - zarr.creation.zeros( - scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype - ) + zarrays[scale_key] = zarr.creation.zeros( + scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype ) meta.append(ImageMetadata(scale_path, scaling, {})) - def scale_and_write_block(scalings_, zarrays_, roi, data): - for i_, scaling_ in enumerate(scalings_): - if i_ > 0: - logger.info(f"Scale {i_}: Applying {scaling_=} to {roi=}") - scaled_data, scaled_roi = _apply_scaling_method(data, roi, scaling_) - slicing = roiToSlice(*scaled_roi) - else: - slicing = roiToSlice(*roi) - scaled_data = data - logger.info(f"Scale {i_}: Writing data with shape={scaled_data.shape} to {slicing=}") - zarrays_[i_][slicing] = scaled_data - requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) - requester.resultSignal.subscribe(partial(scale_and_write_block, output_scales.values(), zarrays)) + requester.resultSignal.subscribe(partial(_scale_and_write_block, output_scales, zarrays)) requester.progressSignal.subscribe(progress_signal) requester.execute() From e6210c713a221025ff3044a3ddd49996e3f1ad6e Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:06:23 +0200 Subject: [PATCH 18/35] OME-Zarr export tests: Add op-level test --- .../test_ioOperators/testOpExportSlot.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 25c8a083c..8e33d8385 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -78,6 +78,35 @@ def testBasic_Hdf5(self): finally: opRead.cleanUp() + def testBasic_ome_zarr(self): + data = numpy.random.random((90, 100)).astype(numpy.float32) + data = vigra.taggedView(data, vigra.defaultAxistags("yx")) + + graph = Graph() + opPiper = OpArrayPiper(graph=graph) + opPiper.Input.setValue(data) + + opExport = OpExportSlot(graph=graph) + opExport.Input.connect(opPiper.Output) + opExport.OutputFormat.setValue("single-scale OME-Zarr") + opExport.OutputFilenameFormat.setValue(self._tmpdir + "/test_export_x{x_start}-{x_stop}_y{y_start}-{y_stop}") + opExport.OutputInternalPath.setValue("volume/data") + opExport.CoordinateOffset.setValue((10, 20)) + + assert opExport.ExportPath.ready() + export_file = PathComponents(opExport.ExportPath.value).externalPath + assert os.path.split(export_file)[1] == "test_export_x20-120_y10-100.zarr" + opExport.run_export() + + opRead = OpInputDataReader(graph=graph) + try: + opRead.FilePath.setValue(opExport.ExportPath.value + "/s0") + expected_data = data.view(numpy.ndarray).reshape((1, 1, 1) + data.shape) # OME-Zarr always tczyx + read_data = opRead.Output[:].wait() + numpy.testing.assert_array_equal(read_data, expected_data) + finally: + opRead.cleanUp() + def testBasic_Npy(self): data = numpy.random.random((100, 100)).astype(numpy.float32) data = vigra.taggedView(data, vigra.defaultAxistags("xy")) From 5d5b7345a5cf7de9316aca06a80909df2b88a400 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:54:16 +0200 Subject: [PATCH 19/35] OME-Zarr export: Refactor Avoid passing the slot deeper than the top-level function --- lazyflow/utility/io_util/write_ome_zarr.py | 134 ++++++++++-------- .../test_io_util/test_write_ome_zarr.py | 4 +- 2 files changed, 77 insertions(+), 61 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index a675655fb..bd2a388c4 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -19,8 +19,9 @@ logger = logging.getLogger(__name__) Shape = Tuple[int, ...] -TaggedShape = OrderedDict[str, int] # axis: size -OrderedScaling = OrderedDict[str, float] # axis: scale +TaggedShape = OrderedDict[str, int] # { axis: size } +OrderedScaling = OrderedDict[str, float] # { axis: scaling } +ScalingsByScaleKey = OrderedDict[str, OrderedScaling] # { scale_key: { axis: scaling } } SPATIAL_AXES = ["z", "y", "x"] @@ -32,21 +33,20 @@ class ImageMetadata: translation: Dict[str, float] -def _get_chunk_shape(image_source_slot: Slot) -> Shape: - """Determine chunk shape for OME-Zarr storage based on image source slot. - Chunk size is 1 for t and c, and determined by ilastik default rules for zyx, with a target of 512KB per chunk.""" - dtype = image_source_slot.meta.dtype +def _get_chunk_shape(tagged_image_shape: TaggedShape, dtype) -> Shape: + """Determine chunk shape for OME-Zarr storage. 1 for t and c, + ilastik default rules for zyx, with a target of 512KB per chunk.""" if isinstance(dtype, numpy.dtype): # Extract raw type class dtype = dtype.type dtype_bytes = dtype().nbytes - tagged_maxshape = image_source_slot.meta.getTaggedShape() + tagged_maxshape = tagged_image_shape.copy() tagged_maxshape["t"] = 1 tagged_maxshape["c"] = 1 chunk_shape = determineBlockShape(list(tagged_maxshape.values()), 512_000.0 / dtype_bytes) # 512KB chunk size return chunk_shape -def _get_scalings( +def _compute_new_scaling_factors( original_tagged_shape: TaggedShape, chunk_shape: Shape, compute_downscales: bool ) -> List[OrderedScaling]: """ @@ -110,12 +110,10 @@ def _round_like_scaling_method(value: float) -> int: return int(value) -def _get_input_multiscales_matching_export_image( - image_source_slot: Slot, compute_downscales: bool +def _get_input_multiscales_matching_export( + input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape, compute_downscales: bool ) -> multiscaleStore.Multiscales: """Filter for multiscales entry that matches source image, plus lower scales if compute_downscales is True.""" - input_scales = image_source_slot.meta.scales - export_shape = image_source_slot.meta.getTaggedShape() matching_scales = [] # Multiscales is ordered from highest to lowest resolution, so start collecting once match found match_found = False @@ -131,7 +129,9 @@ def _get_input_multiscales_matching_export_image( return ODict(matching_scales) -def _multiscales_to_scalings(multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape) -> List[OrderedScaling]: +def _multiscales_to_scaling_factors( + multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape +) -> List[OrderedScaling]: scalings = [] for scale_shape in multiscales.values(): # base_shape / scale_shape: See note on scaling divisors in _get_scalings @@ -142,6 +142,48 @@ def _multiscales_to_scalings(multiscales: multiscaleStore.Multiscales, base_shap return scalings +def _match_or_create_scalings( + input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape, chunk_shape, compute_downscales: bool +) -> ScalingsByScaleKey: + if input_scales: + # Source image is already multiscale, match its scales + filtered_input_scales = _get_input_multiscales_matching_export(input_scales, export_shape, compute_downscales) + factors = _multiscales_to_scaling_factors(filtered_input_scales, export_shape) + output_scalings = ODict(zip(filtered_input_scales.keys(), factors)) + else: + # Compute new scale levels + factors = _compute_new_scaling_factors(export_shape, chunk_shape, compute_downscales) + output_scalings = ODict(zip([f"s{i}" for i in range(len(factors))], factors)) + return output_scalings + + +def _create_empty_zarrays( + export_path: str, + export_dtype, + chunk_shape: Shape, + export_shape: TaggedShape, + output_scalings: ScalingsByScaleKey, +) -> Tuple[OrderedDict[str, zarr.Array], List[ImageMetadata]]: + pc = PathComponents(export_path) + external_path = pc.externalPath + internal_path = pc.internalPath.lstrip("/") if pc.internalPath else None + store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) + zarrays = ODict() + meta = [] + for scale_key, scaling in output_scalings.items(): + scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key + scaled_shape = _scale_tagged_shape(export_shape, scaling).values() + if contains_array(store, scale_path): + logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") + del store[scale_path] + zarrays[scale_key] = zarr.creation.zeros( + scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype + ) + meta.append(ImageMetadata(scale_path, scaling, {})) + + return zarrays, meta + + def _apply_scaling_method( data: numpy.typing.NDArray, current_block_roi: Tuple[List[int], List[int]], scaling: OrderedScaling ) -> Tuple[numpy.typing.NDArray, Tuple[List[int], List[int]]]: @@ -150,7 +192,7 @@ def _apply_scaling_method( raise NotImplementedError() -def _scale_and_write_block(scales: OrderedDict[str, OrderedScaling], zarrays: OrderedDict[str, zarr.Array], roi, data): +def _scale_and_write_block(scales: ScalingsByScaleKey, zarrays: OrderedDict[str, zarr.Array], roi, data): assert scales.keys() == zarrays.keys() for scale_key_, scaling_ in scales.items(): if scaling_["x"] > 1.0 or scaling_["y"] > 1.0: @@ -164,46 +206,6 @@ def _scale_and_write_block(scales: OrderedDict[str, OrderedScaling], zarrays: Or zarrays[scale_key_][slicing] = scaled_data -def _compute_and_write_scales( - export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal, compute_downscales: bool -) -> List[ImageMetadata]: - pc = PathComponents(export_path) - external_path = pc.externalPath - internal_path = pc.internalPath.lstrip("/") if pc.internalPath else None - store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) - chunk_shape = _get_chunk_shape(image_source_slot) - - if "scales" in image_source_slot.meta and image_source_slot.meta.scales: - # Source image is already multiscale, match its scales - input_scales = _get_input_multiscales_matching_export_image(image_source_slot, compute_downscales) - scalings = _multiscales_to_scalings(input_scales, image_source_slot.meta.getTaggedShape()) - output_scales = ODict(zip(input_scales.keys(), scalings)) - else: - # Compute new scale levels - scalings = _get_scalings(image_source_slot.meta.getTaggedShape(), chunk_shape, compute_downscales) - output_scales = ODict(zip([f"s{i}" for i in range(len(scalings))], scalings)) - - zarrays = ODict() - meta = [] - for scale_key, scaling in output_scales.items(): - scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key - scaled_shape = _scale_tagged_shape(image_source_slot.meta.getTaggedShape(), scaling).values() - if contains_array(store, scale_path): - logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") - del store[scale_path] - zarrays[scale_key] = zarr.creation.zeros( - scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=image_source_slot.meta.dtype - ) - meta.append(ImageMetadata(scale_path, scaling, {})) - - requester = BigRequestStreamer(image_source_slot, roiFromShape(image_source_slot.meta.shape)) - requester.resultSignal.subscribe(partial(_scale_and_write_block, output_scales, zarrays)) - requester.progressSignal.subscribe(progress_signal) - requester.execute() - - return meta - - def _write_ome_zarr_and_ilastik_metadata( export_path: str, multiscale_metadata: List[ImageMetadata], ilastik_meta: Dict ): @@ -247,17 +249,31 @@ def write_ome_zarr( op_reorder.AxisOrder.setValue("tczyx") try: op_reorder.Input.connect(image_source_slot) - image_source = op_reorder.Output + reordered_source = op_reorder.Output progress_signal(25) - ome_zarr_meta = _compute_and_write_scales(export_path, image_source, progress_signal, compute_downscales) + export_shape = reordered_source.meta.getTaggedShape() + export_dtype = reordered_source.meta.dtype + input_scales = reordered_source.meta.scales if "scales" in reordered_source.meta else None + + chunk_shape = _get_chunk_shape(export_shape, export_dtype) + output_scalings = _match_or_create_scalings(input_scales, export_shape, chunk_shape, compute_downscales) + zarrays, ome_zarr_meta = _create_empty_zarrays( + export_path, export_dtype, chunk_shape, export_shape, output_scalings + ) + + requester = BigRequestStreamer(reordered_source, roiFromShape(reordered_source.meta.shape)) + requester.resultSignal.subscribe(partial(_scale_and_write_block, output_scalings, zarrays)) + requester.progressSignal.subscribe(progress_signal) + requester.execute() + progress_signal(95) _write_ome_zarr_and_ilastik_metadata( export_path, ome_zarr_meta, { - "axistags": op_reorder.Output.meta.axistags, - "display_mode": image_source_slot.meta.get("display_mode"), - "drange": image_source_slot.meta.get("drange"), + "axistags": reordered_source.meta.axistags, + "display_mode": reordered_source.meta.get("display_mode"), + "drange": reordered_source.meta.get("drange"), }, ) finally: diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index a1d4df536..dc67cbc98 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -10,7 +10,7 @@ from lazyflow.operators import OpArrayPiper from lazyflow.roi import roiToSlice from lazyflow.utility.io_util import multiscaleStore -from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _get_scalings, _apply_scaling_method +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _compute_new_scaling_factors, _apply_scaling_method @pytest.mark.parametrize( @@ -160,7 +160,7 @@ def test_downscaling_raises(): insane_length = chunk_length * (scaling_factor**sanity_limit) * minimum_chunks_per_scale insane_data_shape = OrderedDict({"t": 1, "c": 1, "z": 1, "y": 1, "x": insane_length}) with pytest.raises(ValueError, match="Too many scales"): - _get_scalings(insane_data_shape, (1, 1, 1, 1, chunk_length), compute_downscales=True) + _compute_new_scaling_factors(insane_data_shape, (1, 1, 1, 1, chunk_length), compute_downscales=True) @pytest.mark.skip("To be implemented after releasing single-scale export") From 465193c565aee99b3e37b3d4bb767be8b22abbac Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:46:39 +0200 Subject: [PATCH 20/35] OME-Zarr rountrip/Multiscale: Carry over metadata from input when exporting This constrains reading from disc: Previously one could have opened an OME-Zarr dataset from an internal path that isn't correctly described in the metadata. No longer possible now that dataset metadata must be handed over for a potential export --- .../ioOperators/opOMEZarrMultiscaleReader.py | 2 + .../ioOperators/opStreamingH5N5Reader.py | 26 +- lazyflow/utility/io_util/OMEZarrStore.py | 59 ++++- lazyflow/utility/io_util/write_ome_zarr.py | 226 ++++++++++++++---- .../testOpStreamingH5N5Reader.py | 6 +- .../test_io_util/test_write_ome_zarr.py | 78 +++++- 6 files changed, 342 insertions(+), 55 deletions(-) diff --git a/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py b/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py index cd2d90ca3..4fffb3435 100644 --- a/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py +++ b/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py @@ -64,6 +64,8 @@ def setupOutputs(self): # Many public OME-Zarr datasets are chunked as full xy slices, # so orthoviews lead to downloading the entire dataset. self.Output.meta.prefer_2d = True + # Add OME-Zarr metadata to slot so that it can be ported over to an export + self.Output.meta.ome_zarr_meta = self._store.ome_meta_for_export def execute(self, slot, subindex, roi, result): scale = self.Scale.value if self.Scale.ready() and self.Scale.value else self._store.lowest_resolution_key diff --git a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py index 3c5028693..d4bea51dd 100644 --- a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py +++ b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py @@ -33,7 +33,7 @@ from lazyflow.graph import Operator, InputSlot, OutputSlot from lazyflow.utility import Timer from lazyflow.utility.helpers import get_default_axisordering, bigintprod -from lazyflow.utility.io_util.OMEZarrStore import get_axistags_from_spec as get_ome_zarr_axistags +from lazyflow.utility.io_util.OMEZarrStore import get_axistags_from_spec as get_ome_zarr_axistags, OMEZarrMultiscaleMeta logger = logging.getLogger(__name__) @@ -52,12 +52,7 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile], try: # Look for OME-Zarr metadata (found at store root, not in dataset) # OME-Zarr stores with more than one multiscale don't exist in public, but the spec allows it - multiscale_index = None - for i, scale in enumerate(file.attrs["multiscales"]): - if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])): - multiscale_index = i - if multiscale_index is None: - raise KeyError("no spec for dataset path") + multiscale_index = _multiscale_index_for_path(file.attrs["multiscales"], internalPath) return get_ome_zarr_axistags(file.attrs["multiscales"][multiscale_index]) except KeyError as e: msg = ( @@ -79,6 +74,16 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile], return vigra.defaultAxistags(str(axisorder)) +def _multiscale_index_for_path(multiscales_spec, internalPath: str): + multiscale_index = None + for i, scale in enumerate(multiscales_spec): + if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])): + multiscale_index = i + if multiscale_index is None: + raise KeyError("no spec for dataset path") + return multiscale_index + + class OpStreamingH5N5Reader(Operator): """ The top-level operator for the data selection applet. @@ -146,6 +151,13 @@ def setupOutputs(self): if chunks: self.OutputImage.meta.ideal_blockshape = chunks + if isinstance(self._h5N5File, z5py.ZarrFile): + # Add OME-Zarr metadata to slot so that it can be ported over to an export + multiscales_spec = self._h5N5File.attrs["multiscales"] + self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec( + multiscales_spec[_multiscale_index_for_path(multiscales_spec, internalPath)] + ) + def execute(self, slot, subindex, roi, result): t = time.time() assert self._h5N5File is not None diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index 7f43aa697..9a2630122 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -18,12 +18,13 @@ # on the ilastik web site at: # http://ilastik.org/license.html ############################################################################### +import dataclasses import json import logging import math import os from collections import OrderedDict -from typing import Dict +from typing import Dict, List, Optional, Union, Literal from urllib.parse import unquote_to_bytes import jsonschema @@ -41,6 +42,55 @@ OME_ZARR_V_0_4_KWARGS = dict(dimension_separator="/", normalize_keys=False) OME_ZARR_V_0_1_KWARGS = dict(dimension_separator=".") +# { +# "type": "scale" OR "translation", +# "scale": List[float] OR "translation": List[float] OR "path": str +# } +OMEZarrCoordinateTransformation = Dict[str, Union[str, List[float]]] + + +def _remove_transforms_with_path( + coordinate_transformations: Optional[List[OMEZarrCoordinateTransformation]], +) -> Optional[List[OMEZarrCoordinateTransformation]]: + """ + coordinateTransformations may provide a path to transformation data in binary format, instead + of specifying floats for each axis. We don't know of any commonly used tool that writes such data, + so there is no way to know what to do with this. + """ + if coordinate_transformations is None: + return None + return [transform for transform in coordinate_transformations if "path" not in transform] + + +@dataclasses.dataclass +class OMEZarrMultiscaleMeta: + """ + Specifically for metadata that ilastik does _not_ use internally. + It is used for porting metadata from an OME-Zarr input to export. + """ + + axiskeys: List[Literal["t", "c", "z", "y", "x"]] + multiscale_name: Optional[str] + multiscale_transformations: Optional[List[OMEZarrCoordinateTransformation]] + dataset_transformations: OrderedDict[str, List[OMEZarrCoordinateTransformation]] # { scale_key: transformations } + + @classmethod + def from_multiscale_spec(cls, multiscale_spec) -> "OMEZarrMultiscaleMeta": + return cls( + axiskeys=[tag.key for tag in get_axistags_from_spec(multiscale_spec)], + multiscale_name=multiscale_spec.get("name"), + multiscale_transformations=_remove_transforms_with_path(multiscale_spec.get("coordinateTransformations")), + dataset_transformations=OrderedDict( + [ + ( + scale_key_from_path(scale["path"]), + _remove_transforms_with_path(scale.get("coordinateTransformations", [])), + ) + for scale in multiscale_spec["datasets"] + ] + ), + ) + def get_axistags_from_spec(validated_ome_spec: Dict) -> vigra.AxisTags: # We assume the spec is already `jsonschema.validate`d to be a Dict according to OME schema @@ -72,6 +122,10 @@ def _get_zarr_cache_max_size() -> int: return math.floor(caches_max * permissible_fraction_max) +def scale_key_from_path(scale_path): + return scale_path.split("/")[-1] + + class OMEZarrStore(MultiscaleStore): """ Adapter class to handle communication with a source serving a dataset in OME-Zarr format. @@ -185,7 +239,7 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): for scale in datasets: # OME-Zarr spec requires datasets ordered from high to low resolution with Timer() as timer: scale_path = scale["path"] - scale_key = scale_path.split("/")[-1] + scale_key = scale_key_from_path(scale_path) # Loading a ZarrArray at this path is necessary to obtain the scale dimensions for the GUI zarray = ZarrArray(store=self._store, path=scale_path) dtype = zarray.dtype.type @@ -196,6 +250,7 @@ def __init__(self, uri: str = "", single_scale_mode: bool = False): "shape": zarray.shape, } logger.info(f"Initializing scale {scale_key} took {timer.seconds()*1000} ms.") + self.ome_meta_for_export = OMEZarrMultiscaleMeta.from_multiscale_spec(multiscale_spec) super().__init__( dtype=dtype, axistags=axistags, diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index bd2a388c4..5ddb4f96a 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -2,7 +2,7 @@ import logging from collections import OrderedDict as ODict from functools import partial -from typing import List, Tuple, Dict, OrderedDict +from typing import List, Tuple, Dict, OrderedDict, Optional, Literal import numpy import zarr @@ -14,7 +14,11 @@ from lazyflow.slot import Slot from lazyflow.utility import OrderedSignal, PathComponents, BigRequestStreamer from lazyflow.utility.io_util import multiscaleStore -from lazyflow.utility.io_util.OMEZarrStore import OME_ZARR_V_0_4_KWARGS +from lazyflow.utility.io_util.OMEZarrStore import ( + OME_ZARR_V_0_4_KWARGS, + OMEZarrMultiscaleMeta, + OMEZarrCoordinateTransformation, +) logger = logging.getLogger(__name__) @@ -29,7 +33,7 @@ @dataclasses.dataclass class ImageMetadata: path: str - scale: OrderedScaling + scaling: OrderedScaling translation: Dict[str, float] @@ -118,7 +122,7 @@ def _get_input_multiscales_matching_export( # Multiscales is ordered from highest to lowest resolution, so start collecting once match found match_found = False for key, scale_shape in input_scales.items(): - if all(scale_shape[a] == export_shape[a] for a in scale_shape.keys()): + if all(scale_shape[a] == export_shape[a] or a == "c" for a in scale_shape.keys()): match_found = True matching_scales.append((key, scale_shape)) if not compute_downscales: @@ -129,32 +133,51 @@ def _get_input_multiscales_matching_export( return ODict(matching_scales) -def _multiscales_to_scaling_factors( - multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape +def _scale_shapes_to_factors( + multiscales: multiscaleStore.Multiscales, + base_shape: TaggedShape, + axiskeys_to_match: List[Literal["t", "c", "z", "y", "x"]], ) -> List[OrderedScaling]: + """Input multiscales may have arbitrary axes. + Output are scaling factors relative to base_shape, with axes axiskeys_to_match. + Scale factor 1.0 for axes not present in scale_shape, and for channel.""" scalings = [] for scale_shape in multiscales.values(): - # base_shape / scale_shape: See note on scaling divisors in _get_scalings - tagged_factors = ODict( - [(a, base / s) for a, s, base in zip(scale_shape.keys(), scale_shape.values(), base_shape.values())] + filtered_base_values = [s for a, s in base_shape.items() if a in scale_shape] + # base_shape / scale_shape: See note on scaling "factors" in _compute_new_scaling_factors + relative_factors = { + a: base / s for a, s, base in zip(scale_shape.keys(), scale_shape.values(), filtered_base_values) + } + axes_matched_factors = ODict( + [(a, relative_factors[a] if a in relative_factors and a != "c" else 1.0) for a in axiskeys_to_match] ) - scalings.append(tagged_factors) + scalings.append(axes_matched_factors) return scalings def _match_or_create_scalings( input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape, chunk_shape, compute_downscales: bool -) -> ScalingsByScaleKey: +) -> Tuple[ScalingsByScaleKey, Optional[ScalingsByScaleKey]]: + """ + Determine scale keys and scaling factors for export. + The second optional return value are the input's scaling factors relative to its raw scale + (needed to provide correct metadata for the exported scale(s), which may exclude the original raw). + """ if input_scales: # Source image is already multiscale, match its scales filtered_input_scales = _get_input_multiscales_matching_export(input_scales, export_shape, compute_downscales) - factors = _multiscales_to_scaling_factors(filtered_input_scales, export_shape) - output_scalings = ODict(zip(filtered_input_scales.keys(), factors)) + factors_relative_to_export = _scale_shapes_to_factors(filtered_input_scales, export_shape, export_shape.keys()) + scalings_relative_to_export = ODict(zip(filtered_input_scales.keys(), factors_relative_to_export)) + # Factors relative to raw scale are used later to provide correct scaling metadata + raw_shape = next(iter(input_scales.values())) + factors_relative_to_raw = _scale_shapes_to_factors(filtered_input_scales, raw_shape, export_shape.keys()) + scalings_relative_to_raw = ODict(zip(filtered_input_scales.keys(), factors_relative_to_raw)) else: # Compute new scale levels factors = _compute_new_scaling_factors(export_shape, chunk_shape, compute_downscales) - output_scalings = ODict(zip([f"s{i}" for i in range(len(factors))], factors)) - return output_scalings + scalings_relative_to_export = ODict(zip([f"s{i}" for i in range(len(factors))], factors)) + scalings_relative_to_raw = None + return scalings_relative_to_export, scalings_relative_to_raw def _create_empty_zarrays( @@ -163,13 +186,13 @@ def _create_empty_zarrays( chunk_shape: Shape, export_shape: TaggedShape, output_scalings: ScalingsByScaleKey, -) -> Tuple[OrderedDict[str, zarr.Array], List[ImageMetadata]]: +) -> Tuple[OrderedDict[str, zarr.Array], OrderedDict[str, ImageMetadata]]: pc = PathComponents(export_path) external_path = pc.externalPath internal_path = pc.internalPath.lstrip("/") if pc.internalPath else None store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) zarrays = ODict() - meta = [] + meta = ODict() for scale_key, scaling in output_scalings.items(): scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key scaled_shape = _scale_tagged_shape(export_shape, scaling).values() @@ -179,7 +202,7 @@ def _create_empty_zarrays( zarrays[scale_key] = zarr.creation.zeros( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype ) - meta.append(ImageMetadata(scale_path, scaling, {})) + meta[scale_key] = ImageMetadata(scale_path, scaling, {}) return zarrays, meta @@ -206,37 +229,155 @@ def _scale_and_write_block(scales: ScalingsByScaleKey, zarrays: OrderedDict[str, zarrays[scale_key_][slicing] = scaled_data +def _get_input_raw_absolute_scaling(input_ome_meta: Optional[OMEZarrMultiscaleMeta]) -> Optional[OrderedScaling]: + input_scale = None + if input_ome_meta: + raw_transforms = next(iter(input_ome_meta.dataset_transformations.values())) + # Spec requires that if any, first must be scale + if len(raw_transforms) > 0: + input_scale = raw_transforms[0] + if input_scale is None or "scale" not in input_scale: + return None + return ODict(zip(input_ome_meta.axiskeys, input_scale["scale"])) + + +def _get_input_dataset_transformations( + input_ome_meta: Optional[OMEZarrMultiscaleMeta], scale_key: str +) -> Tuple[Optional[OMEZarrCoordinateTransformation], Optional[OMEZarrCoordinateTransformation]]: + input_scale = None + input_translation = None + if input_ome_meta and input_ome_meta.dataset_transformations.get(scale_key): + input_transforms = input_ome_meta.dataset_transformations[scale_key] + # Spec requires that if any, first must be scale, second may be translation + if len(input_transforms) > 0: + input_scale = input_transforms[0] + if len(input_transforms) > 1: + input_translation = input_transforms[1] + return input_scale, input_translation + + +def _update_export_scaling_from_input( + absolute_scaling: OrderedScaling, + input_ome_meta: Optional[OMEZarrMultiscaleMeta], + input_scale: Optional[OMEZarrCoordinateTransformation], + scale_key: str, +) -> OrderedScaling: + if not input_scale or "scale" not in input_scale: + return absolute_scaling + input_scaling = ODict(zip(input_ome_meta.axiskeys, input_scale["scale"])) + if any([input_scaling[a] != absolute_scaling[a] for a in SPATIAL_AXES if a in input_scaling]): + # This shouldn't happen + logger.warning( + "The scaling level of the exported OME-Zarr dataset was supposed to be " + f"matched to the input dataset, but the scaling factors differ at scale {scale_key}. " + "Your exported images should be fine, but their metadata (pixel resolution) may be incorrect. " + "Please report this to the ilastik team. " + ) + # The only scale to actually update is time, if it exists in the input + updated_scaling = absolute_scaling.copy() + if "t" in input_scaling.keys() and "t" in absolute_scaling.keys(): + updated_scaling["t"] = input_scaling["t"] + return updated_scaling + + +def _make_absolute_if_possible(relative_scaling: OrderedScaling, raw_data_abs_scale: Optional[OrderedScaling]): + if not raw_data_abs_scale: + return relative_scaling + items_per_axis = [ + (a, s * raw_data_abs_scale[a]) if a in raw_data_abs_scale and a != "c" else (a, s) + for a, s in relative_scaling.items() + ] + return ODict(items_per_axis) + + +def _write_to_dataset_attrs(ilastik_meta: Dict, za: zarr.Array): + za.attrs["axistags"] = ilastik_meta["axistags"].toJSON() + if ilastik_meta["display_mode"]: + za.attrs["display_mode"] = ilastik_meta["display_mode"] + if ilastik_meta["drange"]: + za.attrs["drange"] = ilastik_meta["drange"] + + +def _get_datasets_meta( + multiscale_metadata: OrderedDict[str, ImageMetadata], + input_ome_meta: Optional[OMEZarrMultiscaleMeta], + scalings_relative_to_raw_input: Optional[ScalingsByScaleKey], +): + """ + Dataset metadata consists of (1) path, (2) coordinate transformations (scale and translation). + By default, scale is just pixel resolution relative to export, i.e. 1.0, 2.0, 4.0 etc. along each scaled axis. + This gets more complex when the source dataset was multiscale (providing `scalings_relative_to_raw_input`), + or OME-Zarr (providing `input_ome_meta`). + """ + datasets = [] + raw_data_abs_scale = _get_input_raw_absolute_scaling(input_ome_meta) + for scale_key, image in multiscale_metadata.items(): + if scalings_relative_to_raw_input and scale_key in scalings_relative_to_raw_input: + relative_scaling = scalings_relative_to_raw_input[scale_key] + else: + relative_scaling = image.scaling + # The scaling factors are relative to export or raw data shape now, + # but the input dataset might contain absolute scale values, i.e. time/pixel resolution + absolute_scaling = _make_absolute_if_possible(relative_scaling, raw_data_abs_scale) + input_scale, input_translation = _get_input_dataset_transformations(input_ome_meta, scale_key) + absolute_scaling = _update_export_scaling_from_input(absolute_scaling, input_ome_meta, input_scale, scale_key) + dataset = { + "path": image.path, + "coordinateTransformations": [{"type": "scale", "scale": list(absolute_scaling.values())}], + } + if input_translation and "translation" in input_translation: + tagged_translation = ODict(zip(input_ome_meta.axiskeys, input_translation["translation"])) + reordered_translation = [ + tagged_translation[a] if a in tagged_translation and a != "c" else 0.0 for a in image.scaling.keys() + ] + dataset["coordinateTransformations"].append({"type": "translation", "translation": reordered_translation}) + datasets.append(dataset) + return datasets + + def _write_ome_zarr_and_ilastik_metadata( - export_path: str, multiscale_metadata: List[ImageMetadata], ilastik_meta: Dict + export_path: str, + export_meta: OrderedDict[str, ImageMetadata], + scalings_relative_to_raw_input: Optional[ScalingsByScaleKey], + input_ome_meta: Optional[OMEZarrMultiscaleMeta], + ilastik_meta: Dict, ): pc = PathComponents(export_path) external_path = pc.externalPath multiscale_name = pc.internalPath.lstrip("/") if pc.internalPath else None ilastik_signature = {"name": "ilastik", "version": ilastik_version, "ome_zarr_exporter_version": 1} axis_types = {"t": "time", "c": "channel", "z": "space", "y": "space", "x": "space"} - axes = [{"name": tag.key, "type": axis_types[tag.key]} for tag in ilastik_meta["axistags"]] - datasets = [ - { - "path": image.path, - "coordinateTransformations": [ - {"type": "scale", "scale": [image.scale[tag.key] for tag in ilastik_meta["axistags"]]} - ], - } - for image in multiscale_metadata - ] + export_axiskeys = [tag.key for tag in ilastik_meta["axistags"]] + + axes = [{"name": a, "type": axis_types[a]} for a in export_axiskeys] + datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input) ome_zarr_multiscale_meta = {"_creator": ilastik_signature, "version": "0.4", "axes": axes, "datasets": datasets} + + # Optional fields if multiscale_name: ome_zarr_multiscale_meta["name"] = multiscale_name + if input_ome_meta and input_ome_meta.multiscale_transformations: + transforms_axis_matched = [] + for transform in input_ome_meta.multiscale_transformations: + transform_type = transform["type"] + tagged_transform = ODict(zip(input_ome_meta.axiskeys, transform[transform_type])) + default_value = 0.0 if transform_type == "translation" else 1.0 + transforms_axis_matched.append( + { + "type": transform_type, + transform_type: [ + tagged_transform[a] if a in tagged_transform else default_value for a in export_axiskeys + ], + } + ) + ome_zarr_multiscale_meta["coordinateTransformations"] = transforms_axis_matched + store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) root = zarr.group(store, overwrite=False) root.attrs["multiscales"] = [ome_zarr_multiscale_meta] - for image in multiscale_metadata: + for image in export_meta.values(): za = zarr.Array(store, path=image.path) - za.attrs["axistags"] = ilastik_meta["axistags"].toJSON() - if ilastik_meta["display_mode"]: - za.attrs["display_mode"] = ilastik_meta["display_mode"] - if ilastik_meta["drange"]: - za.attrs["drange"] = ilastik_meta["drange"] + _write_to_dataset_attrs(ilastik_meta, za) def write_ome_zarr( @@ -254,22 +395,27 @@ def write_ome_zarr( export_shape = reordered_source.meta.getTaggedShape() export_dtype = reordered_source.meta.dtype input_scales = reordered_source.meta.scales if "scales" in reordered_source.meta else None + input_ome_meta = reordered_source.meta.get("ome_zarr_meta") chunk_shape = _get_chunk_shape(export_shape, export_dtype) - output_scalings = _match_or_create_scalings(input_scales, export_shape, chunk_shape, compute_downscales) - zarrays, ome_zarr_meta = _create_empty_zarrays( - export_path, export_dtype, chunk_shape, export_shape, output_scalings + export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings( + input_scales, export_shape, chunk_shape, compute_downscales + ) + zarrays, export_meta = _create_empty_zarrays( + export_path, export_dtype, chunk_shape, export_shape, export_scalings ) requester = BigRequestStreamer(reordered_source, roiFromShape(reordered_source.meta.shape)) - requester.resultSignal.subscribe(partial(_scale_and_write_block, output_scalings, zarrays)) + requester.resultSignal.subscribe(partial(_scale_and_write_block, export_scalings, zarrays)) requester.progressSignal.subscribe(progress_signal) requester.execute() progress_signal(95) _write_ome_zarr_and_ilastik_metadata( export_path, - ome_zarr_meta, + export_meta, + scalings_relative_to_raw_input, + input_ome_meta, { "axistags": reordered_source.meta.axistags, "display_mode": reordered_source.meta.get("display_mode"), diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpStreamingH5N5Reader.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpStreamingH5N5Reader.py index 53b81202d..60b03d260 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpStreamingH5N5Reader.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpStreamingH5N5Reader.py @@ -63,11 +63,11 @@ def test_reader_loads_data_with_axistags(graph, h5n5_file, data): vigra.AxisInfo("c", vigra.AxisType.Channels), vigra.AxisInfo("t", vigra.AxisType.Time), ) - h5n5_file.create_group("volume").create_dataset("tagged_data", data=data) - h5n5_file["volume/tagged_data"].attrs["axistags"] = axistags.toJSON() + h5n5_file.create_group("volume").create_dataset("data", data=data) + h5n5_file["volume/data"].attrs["axistags"] = axistags.toJSON() op = OpStreamingH5N5Reader(graph=graph) op.H5N5File.setValue(h5n5_file) - op.InternalPath.setValue("volume/tagged_data") + op.InternalPath.setValue("volume/data") assert op.OutputImage.meta.shape == data.shape assert op.OutputImage.meta.axistags == axistags diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index dc67cbc98..56fdcad5e 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -10,6 +10,7 @@ from lazyflow.operators import OpArrayPiper from lazyflow.roi import roiToSlice from lazyflow.utility.io_util import multiscaleStore +from lazyflow.utility.io_util.OMEZarrStore import OMEZarrMultiscaleMeta from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _compute_new_scaling_factors, _apply_scaling_method @@ -231,8 +232,42 @@ def test_overwrite_existing_store(tmp_path, tiny_5d_vigra_array_piper): numpy.testing.assert_array_equal(written_data, data_array2) -def test_match_scale_key_to_input(tmp_path, tiny_5d_vigra_array_piper): - """If the source slot has scale metadata, the export should match the scale name to the input.""" +def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): + """If the source slot has scale metadata, the export should match the scale name to the input. + Scaling metadata should be relative to the input's raw data.""" + store_path = tmp_path / "test.zarr" + export_path = store_path / "subdir" + source_op = tiny_5d_vigra_array_piper + progress = mock.Mock() + input_axes = ["t", "z", "y", "x"] + multiscales: multiscaleStore.Multiscales = OrderedDict( + [ + ("raw_scale", OrderedDict(zip(input_axes, (2, 15, 15, 15)))), + ("matching_scale", OrderedDict(zip(input_axes, (2, 5, 5, 5)))), + ("downscale", OrderedDict(zip(input_axes, (2, 2, 2, 2)))), + ] + ) + source_op.Output.meta.scales = multiscales + # Scale metadata should be relative to raw scale, even if the export was not scaled + # Exported array is 5d, so 5 scaling entries expected even though source multiscales to match are 4d + expected_matching_scale_transform = [{"type": "scale", "scale": [1.0, 1.0, 3.0, 3.0, 3.0]}] + + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) + + store = zarr.open(str(store_path)) + assert "multiscales" in store.attrs + m = store.attrs["multiscales"][0] + assert "datasets" in m and "path" in m["datasets"][0] + assert len(m["datasets"]) == 1 + assert m["datasets"][0]["path"] == "subdir/matching_scale" + assert m["datasets"][0]["coordinateTransformations"] == expected_matching_scale_transform + + +def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): + """If the source slot has scale metadata, the export should match the scale name to the input. + If there is OME-Zarr specific additional metadata (even unused in ilastik), + the export should write metadata that describe the pyramid as a whole, and those that + describe the written scale.""" store_path = tmp_path / "test.zarr" export_path = store_path / "subdir" source_op = tiny_5d_vigra_array_piper @@ -245,6 +280,41 @@ def test_match_scale_key_to_input(tmp_path, tiny_5d_vigra_array_piper): ] ) source_op.Output.meta.scales = multiscales + expected_multiscale_transform = [{"type": "scale", "scale": [0.1, 1.0, 1.0, 1.0, 1.0]}] + expected_matching_scale_transform = [ + {"type": "scale", "scale": [1.0, 1.0, 2.0, 2.0, 2.0]}, + {"type": "translation", "translation": [0.1, 0.0, 3.2, 1.0, 1.0]}, + ] + source_op.Output.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec( + { + "name": "wonderful_pyramid", + "axes": ["t", "z", "y", "x"], # Input metadata tzyx, but e.g. Probabilities output would be tczyx + "coordinateTransformations": [{"type": "scale", "scale": [0.1, 1.0, 1.0, 1.0]}], + "datasets": [ + { + "path": "raw_scale", + "coordinateTransformations": [ + {"type": "scale", "scale": [1.0, 1.0, 1.0, 1.0]}, + {"type": "translation", "translation": [0.1, 5.0, 2.0, 1.0]}, + ], + }, + { + "path": "matching_scale", + "coordinateTransformations": [ + {"type": "scale", "scale": [1.0, 2.0, 2.0, 2.0]}, + {"type": "translation", "translation": [0.1, 3.2, 1.0, 1.0]}, + ], + }, + { + "path": "downscale", + "coordinateTransformations": [ + {"type": "scale", "scale": [1.0, 4.0, 4.0, 4.0]}, + {"type": "translation", "translation": [5.1, 3.5, 5.4, 1.0]}, + ], + }, + ], + } + ) write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) @@ -253,5 +323,7 @@ def test_match_scale_key_to_input(tmp_path, tiny_5d_vigra_array_piper): m = store.attrs["multiscales"][0] assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 + assert m["name"] == "subdir" # Input name should not be carried over - presumably it names the raw data + assert m["coordinateTransformations"] == expected_multiscale_transform assert m["datasets"][0]["path"] == "subdir/matching_scale" - assert m["datasets"][0]["coordinateTransformations"][0]["scale"] == [1.0, 1.0, 1.0, 1.0, 1.0] + assert m["datasets"][0]["coordinateTransformations"] == expected_matching_scale_transform From d4bf91ddd33031304dcdcb85252e0b5fb58a03e9 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:42:34 +0200 Subject: [PATCH 21/35] Skip hanging test --- .../test_workflows/testEdgeTrainingWithMulticutGui.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_ilastik/test_workflows/testEdgeTrainingWithMulticutGui.py b/tests/test_ilastik/test_workflows/testEdgeTrainingWithMulticutGui.py index 30256d312..2358307ae 100644 --- a/tests/test_ilastik/test_workflows/testEdgeTrainingWithMulticutGui.py +++ b/tests/test_ilastik/test_workflows/testEdgeTrainingWithMulticutGui.py @@ -272,7 +272,7 @@ def impl(): self.exec_in_shell(impl) @pytest.mark.skipif( - platform.system() == "Windows" and os.environ.get("APPVEYOR"), reason="Test hangs on Appveyor ci" + platform.system() == "Windows", reason="Test hangs, some issue with threading in self.waitForViews" ) @pytest.mark.skipif( platform.system() == "Darwin", @@ -344,7 +344,7 @@ def impl(): self.exec_in_shell(impl) @pytest.mark.skipif( - platform.system() == "Windows" and os.environ.get("APPVEYOR"), reason="Test hangs on Appveyor ci" + platform.system() == "Windows", reason="Test hangs, some issue with threading in self.waitForViews" ) def test_05_train_rf_from_gt(self): """ @@ -414,7 +414,7 @@ def handle_msgbox(): self.exec_in_shell(impl) @pytest.mark.skipif( - platform.system() == "Windows" and os.environ.get("APPVEYOR"), reason="Test hangs on Appveyor ci" + platform.system() == "Windows", reason="Test hangs, some issue with threading in self.waitForViews" ) def test_06_multicut_rf(self): """ From bf304020af2f2d349dba18dfebd5e604ea70d920 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:00:37 +0200 Subject: [PATCH 22/35] OME-Zarr export: Creator meta on root, not multiscales --- lazyflow/utility/io_util/write_ome_zarr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 5ddb4f96a..8381e54b6 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -351,7 +351,7 @@ def _write_ome_zarr_and_ilastik_metadata( axes = [{"name": a, "type": axis_types[a]} for a in export_axiskeys] datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input) - ome_zarr_multiscale_meta = {"_creator": ilastik_signature, "version": "0.4", "axes": axes, "datasets": datasets} + ome_zarr_multiscale_meta = {"axes": axes, "datasets": datasets, "version": "0.4"} # Optional fields if multiscale_name: @@ -374,6 +374,7 @@ def _write_ome_zarr_and_ilastik_metadata( store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) root = zarr.group(store, overwrite=False) + root.attrs["_creator"] = ilastik_signature root.attrs["multiscales"] = [ome_zarr_multiscale_meta] for image in export_meta.values(): za = zarr.Array(store, path=image.path) From 18496e6fcc489a4b6a73d9c8c28569aaa8c32d6a Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Thu, 26 Sep 2024 13:36:30 +0200 Subject: [PATCH 23/35] OME-Zarr export: Carry over axis units from input --- lazyflow/utility/io_util/OMEZarrStore.py | 9 ++++++-- lazyflow/utility/io_util/write_ome_zarr.py | 22 ++++++++++++++----- .../test_io_util/test_write_ome_zarr.py | 14 +++++++++++- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index 9a2630122..c59f970d6 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -69,15 +69,20 @@ class OMEZarrMultiscaleMeta: It is used for porting metadata from an OME-Zarr input to export. """ - axiskeys: List[Literal["t", "c", "z", "y", "x"]] + axis_units: OrderedDict[Literal["t", "c", "z", "y", "x"], Optional[str]] # { axis_key: axis_unit } multiscale_name: Optional[str] multiscale_transformations: Optional[List[OMEZarrCoordinateTransformation]] dataset_transformations: OrderedDict[str, List[OMEZarrCoordinateTransformation]] # { scale_key: transformations } @classmethod def from_multiscale_spec(cls, multiscale_spec) -> "OMEZarrMultiscaleMeta": + if "axes" in multiscale_spec and "name" in multiscale_spec["axes"][0]: + # In v0.4 OME-Zarr attrs, we might also receive units for each axis + axis_units = OrderedDict([(a["name"], a.get("unit")) for a in multiscale_spec["axes"]]) + else: + axis_units = OrderedDict([(tag.key, None) for tag in get_axistags_from_spec(multiscale_spec)]) return cls( - axiskeys=[tag.key for tag in get_axistags_from_spec(multiscale_spec)], + axis_units=axis_units, multiscale_name=multiscale_spec.get("name"), multiscale_transformations=_remove_transforms_with_path(multiscale_spec.get("coordinateTransformations")), dataset_transformations=OrderedDict( diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 8381e54b6..69a74e1a2 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -238,7 +238,7 @@ def _get_input_raw_absolute_scaling(input_ome_meta: Optional[OMEZarrMultiscaleMe input_scale = raw_transforms[0] if input_scale is None or "scale" not in input_scale: return None - return ODict(zip(input_ome_meta.axiskeys, input_scale["scale"])) + return ODict(zip(input_ome_meta.axis_units.keys(), input_scale["scale"])) def _get_input_dataset_transformations( @@ -264,7 +264,7 @@ def _update_export_scaling_from_input( ) -> OrderedScaling: if not input_scale or "scale" not in input_scale: return absolute_scaling - input_scaling = ODict(zip(input_ome_meta.axiskeys, input_scale["scale"])) + input_scaling = ODict(zip(input_ome_meta.axis_units.keys(), input_scale["scale"])) if any([input_scaling[a] != absolute_scaling[a] for a in SPATIAL_AXES if a in input_scaling]): # This shouldn't happen logger.warning( @@ -298,6 +298,17 @@ def _write_to_dataset_attrs(ilastik_meta: Dict, za: zarr.Array): za.attrs["drange"] = ilastik_meta["drange"] +def _get_axes_meta(export_axiskeys, input_ome_meta): + axis_types = {"t": "time", "c": "channel", "z": "space", "y": "space", "x": "space"} + axes = [{"name": a, "type": axis_types[a]} for a in export_axiskeys] + if input_ome_meta: + # Add unit metadata if available + for a in axes: + if a["name"] in input_ome_meta.axis_units and input_ome_meta.axis_units[a["name"]]: + a["unit"] = input_ome_meta.axis_units[a["name"]] + return axes + + def _get_datasets_meta( multiscale_metadata: OrderedDict[str, ImageMetadata], input_ome_meta: Optional[OMEZarrMultiscaleMeta], @@ -326,7 +337,7 @@ def _get_datasets_meta( "coordinateTransformations": [{"type": "scale", "scale": list(absolute_scaling.values())}], } if input_translation and "translation" in input_translation: - tagged_translation = ODict(zip(input_ome_meta.axiskeys, input_translation["translation"])) + tagged_translation = ODict(zip(input_ome_meta.axis_units.keys(), input_translation["translation"])) reordered_translation = [ tagged_translation[a] if a in tagged_translation and a != "c" else 0.0 for a in image.scaling.keys() ] @@ -346,10 +357,9 @@ def _write_ome_zarr_and_ilastik_metadata( external_path = pc.externalPath multiscale_name = pc.internalPath.lstrip("/") if pc.internalPath else None ilastik_signature = {"name": "ilastik", "version": ilastik_version, "ome_zarr_exporter_version": 1} - axis_types = {"t": "time", "c": "channel", "z": "space", "y": "space", "x": "space"} export_axiskeys = [tag.key for tag in ilastik_meta["axistags"]] - axes = [{"name": a, "type": axis_types[a]} for a in export_axiskeys] + axes = _get_axes_meta(export_axiskeys, input_ome_meta) datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input) ome_zarr_multiscale_meta = {"axes": axes, "datasets": datasets, "version": "0.4"} @@ -360,7 +370,7 @@ def _write_ome_zarr_and_ilastik_metadata( transforms_axis_matched = [] for transform in input_ome_meta.multiscale_transformations: transform_type = transform["type"] - tagged_transform = ODict(zip(input_ome_meta.axiskeys, transform[transform_type])) + tagged_transform = ODict(zip(input_ome_meta.axis_units.keys(), transform[transform_type])) default_value = 0.0 if transform_type == "translation" else 1.0 transforms_axis_matched.append( { diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 56fdcad5e..01c09cb80 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -288,7 +288,12 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): source_op.Output.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec( { "name": "wonderful_pyramid", - "axes": ["t", "z", "y", "x"], # Input metadata tzyx, but e.g. Probabilities output would be tczyx + "axes": [ + {"name": "t", "type": "time", "unit": "second"}, + {"name": "z", "type": "space", "unit": "micrometer"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"}, + ], # Input metadata tzyx, but e.g. Probabilities output would be tczyx "coordinateTransformations": [{"type": "scale", "scale": [0.1, 1.0, 1.0, 1.0]}], "datasets": [ { @@ -324,6 +329,13 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 assert m["name"] == "subdir" # Input name should not be carried over - presumably it names the raw data + assert m["axes"] == [ + {"name": "t", "type": "time", "unit": "second"}, + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space", "unit": "micrometer"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"}, + ] # Axis units should be carried over assert m["coordinateTransformations"] == expected_multiscale_transform assert m["datasets"][0]["path"] == "subdir/matching_scale" assert m["datasets"][0]["coordinateTransformations"] == expected_matching_scale_transform From 44121c1372a4f6adb664e18caa18cf4d00c92738 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 27 Sep 2024 00:13:32 +0200 Subject: [PATCH 24/35] OME-Zarr metadata roundtrip test * H5N5 reader needs to present Zarr with multiscale meta for this to work * Fix rounding errors in scale factor calculation --- .../ioOperators/opStreamingH5N5Reader.py | 23 ++- lazyflow/utility/io_util/write_ome_zarr.py | 30 ++-- .../test_ioOperators/testOpExportSlot.py | 166 ++++++++++++++++++ 3 files changed, 204 insertions(+), 15 deletions(-) diff --git a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py index d4bea51dd..8efe2d19d 100644 --- a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py +++ b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py @@ -23,6 +23,7 @@ import contextlib import logging import time +from collections import OrderedDict from typing import Union import vigra @@ -33,7 +34,12 @@ from lazyflow.graph import Operator, InputSlot, OutputSlot from lazyflow.utility import Timer from lazyflow.utility.helpers import get_default_axisordering, bigintprod -from lazyflow.utility.io_util.OMEZarrStore import get_axistags_from_spec as get_ome_zarr_axistags, OMEZarrMultiscaleMeta +from lazyflow.utility.io_util.OMEZarrStore import ( + get_axistags_from_spec as get_ome_zarr_axistags, + OMEZarrMultiscaleMeta, + scale_key_from_path, +) +from lazyflow.utility.io_util.multiscaleStore import Multiscales logger = logging.getLogger(__name__) @@ -153,10 +159,17 @@ def setupOutputs(self): if isinstance(self._h5N5File, z5py.ZarrFile): # Add OME-Zarr metadata to slot so that it can be ported over to an export - multiscales_spec = self._h5N5File.attrs["multiscales"] - self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec( - multiscales_spec[_multiscale_index_for_path(multiscales_spec, internalPath)] - ) + multiscales_meta = self._h5N5File.attrs["multiscales"] + multiscale_spec = multiscales_meta[_multiscale_index_for_path(multiscales_meta, internalPath)] + scale_keys = [scale_key_from_path(dataset["path"]) for dataset in multiscale_spec["datasets"]] + scale_tagged_shapes = [ + OrderedDict(zip(axistags.keys(), self._h5N5File[dataset["path"]].shape)) + for dataset in multiscale_spec["datasets"] + ] + scales: Multiscales = OrderedDict(zip(scale_keys, scale_tagged_shapes)) + self.OutputImage.meta.scales = scales + self.OutputImage.meta.lowest_scale = scale_keys[-1] + self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec(multiscale_spec) def execute(self, slot, subindex, roi, result): t = time.time() diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 69a74e1a2..45c4fb604 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -136,20 +136,28 @@ def _get_input_multiscales_matching_export( def _scale_shapes_to_factors( multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape, - axiskeys_to_match: List[Literal["t", "c", "z", "y", "x"]], + output_axiskeys: List[Literal["t", "c", "z", "y", "x"]], ) -> List[OrderedScaling]: - """Input multiscales may have arbitrary axes. - Output are scaling factors relative to base_shape, with axes axiskeys_to_match. - Scale factor 1.0 for axes not present in scale_shape, and for channel.""" + """Multiscales and base_shape may have arbitrary axes. + Output are scaling factors relative to base_shape, with axes output_axiskeys. + Scale factor 1.0 for axes not present in scale or base shape, and for channel.""" scalings = [] for scale_shape in multiscales.values(): - filtered_base_values = [s for a, s in base_shape.items() if a in scale_shape] - # base_shape / scale_shape: See note on scaling "factors" in _compute_new_scaling_factors - relative_factors = { - a: base / s for a, s, base in zip(scale_shape.keys(), scale_shape.values(), filtered_base_values) + common_axes = [a for a in scale_shape.keys() if a in base_shape.keys()] + scale_values = [scale_shape[a] for a in common_axes] + base_values = [base_shape[a] for a in common_axes] + # This scale's scaling relative to base_shape; cf note on scaling "factors" in _compute_new_scaling_factors + relative_factors = {a: base / s for a, s, base in zip(common_axes, scale_values, base_values)} + # Account for scale_shape maybe being the result of rounding while downscaling base_shape + rounded = {a: float(round(f)) for a, f in relative_factors.items()} + rounding_errors = {a: (base / rounded[a]) - s for a, s, base in zip(common_axes, scale_values, base_values)} + # Use rounded factors for axes where scale shape was result of rounding (rounding error less than 1px) + rounded_or_relative = { + a: rounded[a] if abs(error) < 1.0 else relative_factors[a] for a, error in rounding_errors.items() } + # Pad with 1.0 for requested axes not present in scale/base, and c axes_matched_factors = ODict( - [(a, relative_factors[a] if a in relative_factors and a != "c" else 1.0) for a in axiskeys_to_match] + [(a, rounded_or_relative[a] if a in rounded_or_relative and a != "c" else 1.0) for a in output_axiskeys] ) scalings.append(axes_matched_factors) return scalings @@ -283,8 +291,10 @@ def _update_export_scaling_from_input( def _make_absolute_if_possible(relative_scaling: OrderedScaling, raw_data_abs_scale: Optional[OrderedScaling]): if not raw_data_abs_scale: return relative_scaling + # Round to avoid floating point errors leading to numbers like 1.4000000000000001 + # Presumably nobody needs scaling factors to more than 13 decimal places items_per_axis = [ - (a, s * raw_data_abs_scale[a]) if a in raw_data_abs_scale and a != "c" else (a, s) + (a, round(s * raw_data_abs_scale[a], 13)) if a in raw_data_abs_scale and a != "c" else (a, s) for a, s in relative_scaling.items() ] return ODict(items_per_axis) diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 8e33d8385..28d502ff4 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -25,11 +25,14 @@ import tempfile import shutil import platform +from pathlib import Path import numpy import vigra +import z5py from lazyflow.graph import Graph +from lazyflow.operator import Operator from lazyflow.utility import PathComponents from lazyflow.roi import roiFromShape from lazyflow.operators.operators import OpArrayPiper @@ -107,6 +110,169 @@ def testBasic_ome_zarr(self): finally: opRead.cleanUp() + def test_ome_zarr_roundtrip(self): + """Ensure that loading an OME-Zarr dataset and then re-exporting one of + its scales produces the same data and metadata.""" + input_meta = [ + { + "name": "input.zarr", + "type": "sample", + "version": "0.4", + "axes": [ + {"type": "space", "name": "y", "unit": "nanometer"}, + {"type": "space", "name": "x", "unit": "nanometer"}, + ], + "datasets": [ + { + "path": "s0", + "coordinateTransformations": [ + {"scale": [0.2, 0.2], "type": "scale"}, + {"translation": [0.0, 0.0], "type": "translation"}, + ], + }, + { + "path": "s1", + "coordinateTransformations": [ + {"scale": [1.4, 1.4], "type": "scale"}, + {"translation": [7.62, 8.49], "type": "translation"}, + ], + }, + ], + "coordinateTransformations": [ + {"scale": [1.0, 1.0], "type": "scale"}, + {"translation": [0.0, 0.0], "type": "translation"}, + ], + } + ] + # Expected written meta is the same as input, but tczyx, only with the respective scale, + # and with "exported_data" as the name (internal path is mandatory due to + # OpExportData.OutputInternalPath having default="exported_data") + expected_meta_s0 = [ + { + "axes": [ + {"name": "t", "type": "time"}, + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space"}, + {"name": "y", "type": "space", "unit": "nanometer"}, + {"name": "x", "type": "space", "unit": "nanometer"}, + ], + "coordinateTransformations": [ + {"scale": [1.0, 1.0, 1.0, 1.0, 1.0], "type": "scale"}, + {"translation": [0.0, 0.0, 0.0, 0.0, 0.0], "type": "translation"}, + ], + "datasets": [ + { + "coordinateTransformations": [ + {"scale": [1.0, 1.0, 1.0, 0.2, 0.2], "type": "scale"}, + {"translation": [0.0, 0.0, 0.0, 0.0, 0.0], "type": "translation"}, + ], + "path": "exported_data/s0", + } + ], + "name": "exported_data", + "version": "0.4", + } + ] + expected_meta_s1 = [ + { + "axes": [ + {"name": "t", "type": "time"}, + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space"}, + {"name": "y", "type": "space", "unit": "nanometer"}, + {"name": "x", "type": "space", "unit": "nanometer"}, + ], + "coordinateTransformations": [ + {"scale": [1.0, 1.0, 1.0, 1.0, 1.0], "type": "scale"}, + {"translation": [0.0, 0.0, 0.0, 0.0, 0.0], "type": "translation"}, + ], + "datasets": [ + { + "coordinateTransformations": [ + {"scale": [1.0, 1.0, 1.0, 1.4, 1.4], "type": "scale"}, + {"translation": [0.0, 0.0, 0.0, 7.62, 8.49], "type": "translation"}, + ], + "path": "exported_data/s1", + } + ], + "name": "exported_data", + "version": "0.4", + } + ] + + path_in = self._tmpdir + "/input.zarr" + file = z5py.ZarrFile(path_in, "w") + data = numpy.random.random((89, 99)).astype(numpy.float32) + downscale = data[::7, ::7] + file.create_dataset("s0", data=data) + file.create_dataset("s1", data=downscale) + file.attrs["multiscales"] = input_meta + + graph = Graph() + # Raw scale first + opRead = OpInputDataReader(graph=graph) + opExport = OpExportSlot(graph=graph) + try: + opRead.FilePath.setValue(path_in + "/s0") + + export_path = self._tmpdir + "/test_export.zarr" + opExport.Input.connect(opRead.Output) + opExport.OutputFormat.setValue("single-scale OME-Zarr") + opExport.OutputFilenameFormat.setValue(export_path) + opExport.run_export() + + assert os.path.exists(export_path) + written_file = z5py.ZarrFile(export_path, "r") + assert written_file.attrs["multiscales"] == expected_meta_s0 + finally: + opExport.cleanUp() + del opExport + opRead.cleanUp() + del opRead + + # Same thing for the second scale + # Have to make new ops because they aren't "recyclable" after a cleanUp + opRead = OpInputDataReader(graph=graph) + opExport = OpExportSlot(graph=graph) + try: + opRead.FilePath.setValue(path_in + "/s1") + + export_path = self._tmpdir + "/test_export.zarr" + opExport.Input.connect(opRead.Output) + opExport.OutputFormat.setValue("single-scale OME-Zarr") + opExport.OutputFilenameFormat.setValue(export_path) + opExport.run_export() + + assert os.path.exists(export_path) + written_file = z5py.ZarrFile(export_path, "r") + assert written_file.attrs["multiscales"] == expected_meta_s1 + finally: + opExport.cleanUp() + opRead.cleanUp() + + # Another time, but give path as URI to go through OMEZarrMultiscaleReader + # opRead then needs a parent to avoid the multiscale reader going into single-scale mode + noop = Operator(graph=graph) + opRead = OpInputDataReader(parent=noop) + opExport = OpExportSlot(parent=noop) + try: + opRead.FilePath.setValue(Path(path_in).as_uri()) + opRead.ActiveScale.setValue("s1") + + export_path = self._tmpdir + "/test_export.zarr" + opExport.Input.connect(opRead.Output) + opExport.OutputFormat.setValue("single-scale OME-Zarr") + opExport.OutputFilenameFormat.setValue(export_path) + opExport.run_export() + + assert os.path.exists(export_path) + written_file = z5py.ZarrFile(export_path, "r") + assert written_file.attrs["multiscales"] == expected_meta_s1 + finally: + opExport.cleanUp() + opRead.cleanUp() + noop.cleanUp() + def testBasic_Npy(self): data = numpy.random.random((100, 100)).astype(numpy.float32) data = vigra.taggedView(data, vigra.defaultAxistags("xy")) From 76b537549e755f52aad3a23c595eebc07d53a3df Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:26:03 +0200 Subject: [PATCH 25/35] OME-Zarr export: Refuse export to existing store --- lazyflow/utility/io_util/write_ome_zarr.py | 10 ++- .../test_io_util/test_write_ome_zarr.py | 74 ++++++++++--------- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 45c4fb604..178b60bcd 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -2,6 +2,7 @@ import logging from collections import OrderedDict as ODict from functools import partial +from pathlib import Path from typing import List, Tuple, Dict, OrderedDict, Optional, Literal import numpy @@ -204,9 +205,6 @@ def _create_empty_zarrays( for scale_key, scaling in output_scalings.items(): scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key scaled_shape = _scale_tagged_shape(export_shape, scaling).values() - if contains_array(store, scale_path): - logger.warning(f"Deleting existing dataset at {external_path}/{scale_path}.") - del store[scale_path] zarrays[scale_key] = zarr.creation.zeros( scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype ) @@ -407,6 +405,12 @@ def write_ome_zarr( progress_signal: OrderedSignal, compute_downscales: bool = False, ): + if Path(PathComponents(export_path).externalPath).exists(): + raise FileExistsError( + "Aborting because export path already exists. Please delete it manually if you intended to overwrite it. " + "Appending to an existing OME-Zarr store is not yet implemented." + f"\nPath: {PathComponents(export_path).externalPath}." + ) op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") try: diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 01c09cb80..3f37fcc83 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -36,9 +36,9 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): expected_axiskeys = "tczyx" assert export_path.exists() - store = zarr.open(str(export_path)) - assert "multiscales" in store.attrs - written_meta = store.attrs["multiscales"][0] + group = zarr.open(str(export_path)) + assert "multiscales" in group.attrs + written_meta = group.attrs["multiscales"][0] assert all([key in written_meta for key in ("datasets", "axes", "version")]) # Keys required by spec assert all([value is not None for value in written_meta.values()]) # Should not write None anywhere assert written_meta["version"] == "0.4" @@ -48,9 +48,9 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): discovered_keys = [] for dataset in written_meta["datasets"]: - assert dataset["path"] in store + assert dataset["path"] in group discovered_keys.append(dataset["path"]) - written_array = store[dataset["path"]] + written_array = group[dataset["path"]] assert written_array.fill_value is not None, "FIJI and z5py don't open zarrays without a fill_value" assert "axistags" in written_array.attrs, f"no axistags for {dataset['path']}" assert vigra.AxisTags.fromJSON(written_array.attrs["axistags"]) == vigra.defaultAxistags(expected_axiskeys) @@ -65,7 +65,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): ) assert written_array.shape == expected_shape assert numpy.count_nonzero(written_array) > numpy.prod(expected_shape) / 2, "did not write actual data" - assert all([key in discovered_keys for key in store.keys()]), "store contains undocumented subpaths" + assert all([key in discovered_keys for key in group.keys()]), "store contains undocumented subpaths" @pytest.mark.parametrize( @@ -89,11 +89,11 @@ def test_writes_with_no_scaling(tmp_path, graph, data_shape, scaling_on): write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=scaling_on) - store = zarr.open(str(export_path)) - meta = store.attrs["multiscales"][0] + group = zarr.open(str(export_path)) + meta = group.attrs["multiscales"][0] assert len(meta["datasets"]) == 1 dataset = meta["datasets"][0] - numpy.testing.assert_array_equal(store[dataset["path"]], data) + numpy.testing.assert_array_equal(group[dataset["path"]], data) scale_transforms = [transform for transform in dataset["coordinateTransformations"] if transform["type"] == "scale"] assert scale_transforms[0]["scale"] == [1.0, 1.0, 1.0, 1.0, 1.0] @@ -127,10 +127,10 @@ def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expec write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) - store = zarr.open(str(export_path)) - meta = store.attrs["multiscales"][0] + group = zarr.open(str(export_path)) + meta = group.attrs["multiscales"][0] assert len(meta["datasets"]) == len(expected_scalings) - numpy.testing.assert_array_equal(store[meta["datasets"][0]["path"]], data) + numpy.testing.assert_array_equal(group[meta["datasets"][0]["path"]], data) for i, scaling in enumerate(expected_scalings): dataset = meta["datasets"][i] @@ -141,13 +141,13 @@ def test_downscaling(tmp_path, graph, data_shape, computation_block_shape, expec # Makes sure that the blockwise-scaled image is identical to downscaling the data at once if scaling == [1.0, 1.0, 4.0, 4.0, 4.0]: downscaled_data = data[:, :, ::4, ::4, ::4] - numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) + numpy.testing.assert_array_equal(group[dataset["path"]], downscaled_data) elif scaling == [1.0, 1.0, 2.0, 2.0, 2.0]: downscaled_data = data[:, :, ::2, ::2, ::2] - numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) + numpy.testing.assert_array_equal(group[dataset["path"]], downscaled_data) elif scaling == [1.0, 1.0, 1.0, 2.0, 2.0]: downscaled_data = data[:, :, :, ::2, ::2] - numpy.testing.assert_array_equal(store[dataset["path"]], downscaled_data) + numpy.testing.assert_array_equal(group[dataset["path"]], downscaled_data) @pytest.mark.skip("To be implemented after releasing single-scale export") @@ -204,32 +204,38 @@ def test_write_new_ome_zarr_with_name_on_disc(tmp_path, tiny_5d_vigra_array_pipe write_ome_zarr(str(export_path), tiny_5d_vigra_array_piper.Output, progress, compute_downscales=True) assert export_path.exists() - store = zarr.open(str(tmp_path / "test.zarr")) - assert "multiscales" in store.attrs - m = store.attrs["multiscales"][0] + group = zarr.open(str(tmp_path / "test.zarr")) + assert "multiscales" in group.attrs + m = group.attrs["multiscales"][0] assert all(key in m for key in ("datasets", "axes", "version", "name")) assert m["version"] == "0.4" assert m["name"] == "predictions/first_attempt" assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] - assert all(dataset["path"] in store for dataset in m["datasets"]) - assert all(dataset["path"][0] != "/" in store for dataset in m["datasets"]) + assert all(dataset["path"] in group for dataset in m["datasets"]) + assert all(dataset["path"][0] != "/" in group for dataset in m["datasets"]) -def test_overwrite_existing_store(tmp_path, tiny_5d_vigra_array_piper): +def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): + original_data_array = tiny_5d_vigra_array_piper.Output.value data_array2 = vigra.VigraArray((1, 1, 3, 3, 3), axistags=vigra.defaultAxistags("tczyx")) data_array2[...] = numpy.indices((1, 1, 3, 3, 3)).sum(0) export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + + with pytest.raises(FileExistsError): + write_ome_zarr(str(export_path / "copy"), source_op.Output, progress, compute_downscales=True) + group = zarr.open(str(export_path)) + assert "copy" not in group, "should not append to existing store" + m = group.attrs["multiscales"][0] + assert m["datasets"][0]["path"] == "s0" + source_op.Input.setValue(data_array2) - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) - store = zarr.open(str(tmp_path / "test.zarr")) - assert "multiscales" in store.attrs - m = store.attrs["multiscales"][0] - assert "datasets" in m and "path" in m["datasets"][0] - written_data = store[m["datasets"][0]["path"]] - numpy.testing.assert_array_equal(written_data, data_array2) + with pytest.raises(FileExistsError): + write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + # should not overwrite existing array + numpy.testing.assert_array_equal(group["s0"], original_data_array) def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): @@ -254,9 +260,9 @@ def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) - store = zarr.open(str(store_path)) - assert "multiscales" in store.attrs - m = store.attrs["multiscales"][0] + group = zarr.open(str(store_path)) + assert "multiscales" in group.attrs + m = group.attrs["multiscales"][0] assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 assert m["datasets"][0]["path"] == "subdir/matching_scale" @@ -323,9 +329,9 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) - store = zarr.open(str(store_path)) - assert "multiscales" in store.attrs - m = store.attrs["multiscales"][0] + group = zarr.open(str(store_path)) + assert "multiscales" in group.attrs + m = group.attrs["multiscales"][0] assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 assert m["name"] == "subdir" # Input name should not be carried over - presumably it names the raw data From cd0bae930dff28e5c50e4b8b2b95087175983dea Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:54:28 +0200 Subject: [PATCH 26/35] OME-Zarr export: Enable export without internal path --- .../operators/ioOperators/opExportSlot.py | 5 +++-- .../test_ioOperators/testOpExportSlot.py | 20 +++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 4edb38deb..43c2aa4bd 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -150,14 +150,15 @@ def _executeExportPath(self, result): path_format += "." + file_extension # Provide the TOTAL path (including dataset name) - if self.OutputFormat.value in ( + hierarchical_formats = ( "hdf5", "compressed hdf5", "n5", "compressed n5", "single-scale OME-Zarr", "multi-scale OME-Zarr", - ): + ) + if self.OutputFormat.value in hierarchical_formats and self.OutputInternalPath.value != "": path_format += "/" + self.OutputInternalPath.value roi = numpy.array(roiFromShape(self.Input.meta.shape)) diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 28d502ff4..7f24db25f 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -81,7 +81,7 @@ def testBasic_Hdf5(self): finally: opRead.cleanUp() - def testBasic_ome_zarr(self): + def test_ome_zarr_without_internal_path(self): data = numpy.random.random((90, 100)).astype(numpy.float32) data = vigra.taggedView(data, vigra.defaultAxistags("yx")) @@ -93,17 +93,19 @@ def testBasic_ome_zarr(self): opExport.Input.connect(opPiper.Output) opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(self._tmpdir + "/test_export_x{x_start}-{x_stop}_y{y_start}-{y_stop}") - opExport.OutputInternalPath.setValue("volume/data") + opExport.OutputInternalPath.setValue("") # Overwrite the slot's default "exported_data" opExport.CoordinateOffset.setValue((10, 20)) assert opExport.ExportPath.ready() - export_file = PathComponents(opExport.ExportPath.value).externalPath - assert os.path.split(export_file)[1] == "test_export_x20-120_y10-100.zarr" + export_path_components = PathComponents(opExport.ExportPath.value) + expected_export_path = Path(self._tmpdir) / "test_export_x20-120_y10-100.zarr" + assert Path(export_path_components.externalPath) == expected_export_path + assert export_path_components.internalPath is None opExport.run_export() opRead = OpInputDataReader(graph=graph) try: - opRead.FilePath.setValue(opExport.ExportPath.value + "/s0") + opRead.FilePath.setValue(str(expected_export_path / "s0")) expected_data = data.view(numpy.ndarray).reshape((1, 1, 1) + data.shape) # OME-Zarr always tczyx read_data = opRead.Output[:].wait() numpy.testing.assert_array_equal(read_data, expected_data) @@ -215,7 +217,7 @@ def test_ome_zarr_roundtrip(self): try: opRead.FilePath.setValue(path_in + "/s0") - export_path = self._tmpdir + "/test_export.zarr" + export_path = self._tmpdir + "/test_export1.zarr" opExport.Input.connect(opRead.Output) opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(export_path) @@ -226,9 +228,7 @@ def test_ome_zarr_roundtrip(self): assert written_file.attrs["multiscales"] == expected_meta_s0 finally: opExport.cleanUp() - del opExport opRead.cleanUp() - del opRead # Same thing for the second scale # Have to make new ops because they aren't "recyclable" after a cleanUp @@ -237,7 +237,7 @@ def test_ome_zarr_roundtrip(self): try: opRead.FilePath.setValue(path_in + "/s1") - export_path = self._tmpdir + "/test_export.zarr" + export_path = self._tmpdir + "/test_export2.zarr" opExport.Input.connect(opRead.Output) opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(export_path) @@ -259,7 +259,7 @@ def test_ome_zarr_roundtrip(self): opRead.FilePath.setValue(Path(path_in).as_uri()) opRead.ActiveScale.setValue("s1") - export_path = self._tmpdir + "/test_export.zarr" + export_path = self._tmpdir + "/test_export3.zarr" opExport.Input.connect(opRead.Output) opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(export_path) From 3e2bf49c033d5ebb7feb9be694c61c6b65167216 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:39:42 +0200 Subject: [PATCH 27/35] Precomputed: Match output scale to input -- fix shape order axistags are czyx but scale["size"] is xyz :) --- lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py index b86fa42b7..9fa5641a7 100644 --- a/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py +++ b/lazyflow/utility/io_util/RESTfulPrecomputedChunkedVolume.py @@ -101,7 +101,7 @@ def __init__(self, volume_url: str, n_threads=4): self._scales = {scale["key"]: scale for scale in self._json_info["scales"]} self.n_channels = self._json_info["num_channels"] scale_shapes = [ - OrderedDict(zip("czyx", [self.n_channels] + scale["size"])) for scale in self._json_info["scales"] + OrderedDict(zip("czyx", [self.n_channels] + scale["size"][::-1])) for scale in self._json_info["scales"] ] scale_metadata = OrderedDict(zip(self._scales.keys(), scale_shapes)) From 22520f54863df466f379832721ebacc6b0c0b27a Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:02:18 +0200 Subject: [PATCH 28/35] Precomputed: Match output scale to input -- add test --- .../dataSelection/testOpDataSelection.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py b/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py index c4a5dc085..e04710d2b 100644 --- a/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py +++ b/tests/test_ilastik/test_applets/dataSelection/testOpDataSelection.py @@ -21,7 +21,7 @@ import json import os import shutil -from collections import defaultdict +from collections import defaultdict, OrderedDict from unittest import mock from unittest.mock import Mock @@ -878,6 +878,13 @@ def test_load_precomputed_chunks_over_http(self, op): loaded_scale0 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale0, self.IMAGE_SCALED.reshape((1, 1, 1, 10, 12))) + assert op.Image.meta.scales == OrderedDict( + [ + ("800_800_70", OrderedDict([("c", 1), ("z", 1), ("y", 20), ("x", 24)])), + ("1600_1600_70", OrderedDict([("c", 1), ("z", 1), ("y", 10), ("x", 12)])), + ] + ) + # Switch to original unscaled resolution (first in the list, see multiscaleStore.multiscales) scale_keys = list(op.Image.meta.scales.keys()) op.ActiveScale.setValue(scale_keys[0]) @@ -980,6 +987,13 @@ def test_ome_zarr_loads_via_FSStore_and_ZarrArray(self, op, mock_ome_zarr_data): loaded_scale0 = op.Image[:].wait() numpy.testing.assert_allclose(loaded_scale0, self.IMAGE_SCALED.reshape((1, 1, 1, 10, 12))) + assert op.Image.meta.scales == OrderedDict( + [ + ("s0", OrderedDict([("z", 1), ("y", 20), ("x", 24)])), + ("s1", OrderedDict([("z", 1), ("y", 10), ("x", 12)])), + ] + ) + # Switch to original unscaled resolution (first in the list, see multiscaleStore.multiscales) scale_keys = list(op.Image.meta.scales.keys()) op.ActiveScale.setValue(scale_keys[0]) From cec15eed5dc88fc171033676f6bdb435f3c41c58 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:45:45 +0200 Subject: [PATCH 29/35] OME-Zarr export review: Remove unimplemented scaling --- .../operators/ioOperators/opExportSlot.py | 12 +- lazyflow/utility/io_util/write_ome_zarr.py | 114 +++--------------- .../test_io_util/test_write_ome_zarr.py | 17 +-- 3 files changed, 25 insertions(+), 118 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 43c2aa4bd..7c00dbdd5 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -90,7 +90,6 @@ class OpExportSlot(Operator): FormatInfo("n5", "n5", 0, 5), FormatInfo("compressed n5", "n5", 0, 5), FormatInfo("single-scale OME-Zarr", "zarr", 0, 5), - FormatInfo("multi-scale OME-Zarr", "zarr", 0, 5), FormatInfo("numpy", "npy", 0, 5), FormatInfo("dvid", "", 2, 5), FormatInfo("blockwise hdf5", "json", 0, 5), @@ -108,7 +107,6 @@ def __init__(self, *args, **kwargs): export_impls["n5"] = ("n5", self._export_h5n5) export_impls["compressed n5"] = ("n5", partial(self._export_h5n5, True)) export_impls["single-scale OME-Zarr"] = ("zarr", self._export_ome_zarr) - export_impls["multi-scale OME-Zarr"] = ("zarr", partial(self._export_ome_zarr, True)) export_impls["numpy"] = ("npy", self._export_npy) export_impls["dvid"] = ("", self._export_dvid) export_impls["blockwise hdf5"] = ("json", self._export_blockwise_hdf5) @@ -156,7 +154,6 @@ def _executeExportPath(self, result): "n5", "compressed n5", "single-scale OME-Zarr", - "multi-scale OME-Zarr", ) if self.OutputFormat.value in hierarchical_formats and self.OutputInternalPath.value != "": path_format += "/" + self.OutputInternalPath.value @@ -414,12 +411,10 @@ def _export_multipage_tiff_sequence(self): opExport.cleanUp() self.progressSignal(100) - def _export_ome_zarr(self, compute_downscales: bool = False): - if compute_downscales: - raise NotImplementedError() + def _export_ome_zarr(self): self.progressSignal(0) try: - write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal, compute_downscales) + write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal) finally: self.progressSignal(100) @@ -468,7 +463,6 @@ class FormatValidity(object): "n5": ALL_DTYPES, "compressed n5": ALL_DTYPES, "single-scale OME-Zarr": ALL_DTYPES, - "multi-scale OME-Zarr": ALL_DTYPES, } # { extension : (min_ndim, max_ndim) } @@ -490,7 +484,6 @@ class FormatValidity(object): "n5": (0, 5), "compressed n5": (0, 5), "single-scale OME-Zarr": (0, 5), - "multi-scale OME-Zarr": (0, 5), } # { extension : [allowed_num_channels] } @@ -512,7 +505,6 @@ class FormatValidity(object): "n5": (), # ditto "compressed n5": (), # ditto "single-scale OME-Zarr": (), - "multi-scale OME-Zarr": (), } @classmethod diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 178b60bcd..2b752d08d 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -51,85 +51,16 @@ def _get_chunk_shape(tagged_image_shape: TaggedShape, dtype) -> Shape: return chunk_shape -def _compute_new_scaling_factors( - original_tagged_shape: TaggedShape, chunk_shape: Shape, compute_downscales: bool -) -> List[OrderedScaling]: - """ - Computes scaling "factors". - Technically they are divisors for the shape (factor 2.0 means half the shape). - Downscaling is done by a factor of 2 in all spatial dimensions until: - - the dataset would be less than 4 x chunk size (2MiB) - - an axis that started non-singleton would become singleton - Returns list of scaling factor dicts by axis, starting with original scale. - The scaling level that meets one of the exit conditions is excluded. - Raises if more than 20 scales are computed (sanity). - """ - assert len(chunk_shape) == len(original_tagged_shape), "Chunk shape and tagged shape must have same length" - original_scale = ODict([(a, 1.0) for a in original_tagged_shape.keys()]) - scalings = [original_scale] - if not compute_downscales: - return scalings - sanity_limit = 20 - for i in range(sanity_limit): - if i == sanity_limit: - raise ValueError(f"Too many scales computed, limit={sanity_limit}. Please report this to the developers.") - new_scaling = ODict( - [ - (a, 2.0 ** (i + 1)) if a in SPATIAL_AXES and original_tagged_shape[a] > 1 else (a, 1.0) - for a in original_tagged_shape.keys() - ] - ) - new_shape = _scale_tagged_shape(original_tagged_shape, new_scaling) - if _is_less_than_4_chunks(new_shape, chunk_shape) or _reduces_any_axis_to_singleton( - new_shape.values(), original_tagged_shape.values() - ): - break - raise NotImplementedError("See _apply_scaling_method()") # scalings.append(new_scaling) - return scalings - - -def _reduces_any_axis_to_singleton(new_shape: Shape, original_shape: Shape): - return any(new <= 1 < orig for new, orig in zip(new_shape, original_shape)) - - -def _is_less_than_4_chunks(new_shape: TaggedShape, chunk_shape: Shape): - spatial_shape = [s for a, s in new_shape.items() if a in SPATIAL_AXES] - return numpy.prod(spatial_shape) < 4 * numpy.prod(chunk_shape) - - -def _scale_tagged_shape(original_tagged_shape: TaggedShape, scaling: OrderedScaling) -> TaggedShape: - assert all(s > 0 for s in scaling.values()), f"Invalid scaling: {scaling}" - return ODict( - [ - (a, _round_like_scaling_method(s / scaling[a]) if a in scaling else s) - for a, s in original_tagged_shape.items() - ] - ) - - -def _round_like_scaling_method(value: float) -> int: - """For calculating scaled shape after applying the scaling method. - Different scaling methods round differently, so we need to match that. - E.g. scaling by stepwise downsampling like image[::2, ::2] always rounds up, - while e.g. skimage.transform.rescale rounds mathematically like standard round().""" - return int(value) - - -def _get_input_multiscales_matching_export( - input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape, compute_downscales: bool +def _get_input_multiscale_matching_export( + input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape ) -> multiscaleStore.Multiscales: - """Filter for multiscales entry that matches source image, plus lower scales if compute_downscales is True.""" + """Filter for multiscales entry that matches source image.""" matching_scales = [] - # Multiscales is ordered from highest to lowest resolution, so start collecting once match found - match_found = False + # Multiscales is ordered from highest to lowest resolution for key, scale_shape in input_scales.items(): if all(scale_shape[a] == export_shape[a] or a == "c" for a in scale_shape.keys()): - match_found = True - matching_scales.append((key, scale_shape)) - if not compute_downscales: - break - elif match_found: matching_scales.append((key, scale_shape)) + break assert len(matching_scales) > 0, "Should be impossible, input must be one of the scales" return ODict(matching_scales) @@ -147,7 +78,8 @@ def _scale_shapes_to_factors( common_axes = [a for a in scale_shape.keys() if a in base_shape.keys()] scale_values = [scale_shape[a] for a in common_axes] base_values = [base_shape[a] for a in common_axes] - # This scale's scaling relative to base_shape; cf note on scaling "factors" in _compute_new_scaling_factors + # This scale's scaling relative to base_shape. + # Scaling "factors" are technically divisors for the shape (factor 2.0 means half the shape). relative_factors = {a: base / s for a, s, base in zip(common_axes, scale_values, base_values)} # Account for scale_shape maybe being the result of rounding while downscaling base_shape rounded = {a: float(round(f)) for a, f in relative_factors.items()} @@ -165,7 +97,7 @@ def _scale_shapes_to_factors( def _match_or_create_scalings( - input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape, chunk_shape, compute_downscales: bool + input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape ) -> Tuple[ScalingsByScaleKey, Optional[ScalingsByScaleKey]]: """ Determine scale keys and scaling factors for export. @@ -174,7 +106,7 @@ def _match_or_create_scalings( """ if input_scales: # Source image is already multiscale, match its scales - filtered_input_scales = _get_input_multiscales_matching_export(input_scales, export_shape, compute_downscales) + filtered_input_scales = _get_input_multiscale_matching_export(input_scales, export_shape) factors_relative_to_export = _scale_shapes_to_factors(filtered_input_scales, export_shape, export_shape.keys()) scalings_relative_to_export = ODict(zip(filtered_input_scales.keys(), factors_relative_to_export)) # Factors relative to raw scale are used later to provide correct scaling metadata @@ -183,7 +115,7 @@ def _match_or_create_scalings( scalings_relative_to_raw = ODict(zip(filtered_input_scales.keys(), factors_relative_to_raw)) else: # Compute new scale levels - factors = _compute_new_scaling_factors(export_shape, chunk_shape, compute_downscales) + factors = [ODict([(a, 1.0) for a in export_shape.keys()])] scalings_relative_to_export = ODict(zip([f"s{i}" for i in range(len(factors))], factors)) scalings_relative_to_raw = None return scalings_relative_to_export, scalings_relative_to_raw @@ -204,30 +136,19 @@ def _create_empty_zarrays( meta = ODict() for scale_key, scaling in output_scalings.items(): scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key - scaled_shape = _scale_tagged_shape(export_shape, scaling).values() zarrays[scale_key] = zarr.creation.zeros( - scaled_shape, store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype + export_shape.values(), store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype ) meta[scale_key] = ImageMetadata(scale_path, scaling, {}) return zarrays, meta -def _apply_scaling_method( - data: numpy.typing.NDArray, current_block_roi: Tuple[List[int], List[int]], scaling: OrderedScaling -) -> Tuple[numpy.typing.NDArray, Tuple[List[int], List[int]]]: - """Downscaling tbd, need to investigate whether blockwise scaling is feasible. - May have to restructure the flow instead and potentially do export blockwise, then scaling afterwards.""" - raise NotImplementedError() - - def _scale_and_write_block(scales: ScalingsByScaleKey, zarrays: OrderedDict[str, zarr.Array], roi, data): assert scales.keys() == zarrays.keys() for scale_key_, scaling_ in scales.items(): if scaling_["x"] > 1.0 or scaling_["y"] > 1.0: - logger.info(f"Scale {scale_key_}: Applying {scaling_=} to {roi=}") - scaled_data, scaled_roi = _apply_scaling_method(data, roi, scaling_) - slicing = roiToSlice(*scaled_roi) + raise NotImplementedError("Downscaling is not yet implemented.") else: slicing = roiToSlice(*roi) scaled_data = data @@ -399,12 +320,7 @@ def _write_ome_zarr_and_ilastik_metadata( _write_to_dataset_attrs(ilastik_meta, za) -def write_ome_zarr( - export_path: str, - image_source_slot: Slot, - progress_signal: OrderedSignal, - compute_downscales: bool = False, -): +def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal): if Path(PathComponents(export_path).externalPath).exists(): raise FileExistsError( "Aborting because export path already exists. Please delete it manually if you intended to overwrite it. " @@ -423,9 +339,7 @@ def write_ome_zarr( input_ome_meta = reordered_source.meta.get("ome_zarr_meta") chunk_shape = _get_chunk_shape(export_shape, export_dtype) - export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings( - input_scales, export_shape, chunk_shape, compute_downscales - ) + export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings(input_scales, export_shape) zarrays, export_meta = _create_empty_zarrays( export_path, export_dtype, chunk_shape, export_shape, export_scalings ) diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 3f37fcc83..84c0a1503 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -11,7 +11,7 @@ from lazyflow.roi import roiToSlice from lazyflow.utility.io_util import multiscaleStore from lazyflow.utility.io_util.OMEZarrStore import OMEZarrMultiscaleMeta -from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr, _compute_new_scaling_factors, _apply_scaling_method +from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr @pytest.mark.parametrize( @@ -32,7 +32,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path), source_op.Output, progress) expected_axiskeys = "tczyx" assert export_path.exists() @@ -68,6 +68,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): assert all([key in discovered_keys for key in group.keys()]), "store contains undocumented subpaths" +@pytest.mark.skip("To be implemented after releasing single-scale export") @pytest.mark.parametrize( "data_shape,scaling_on", [ @@ -201,7 +202,7 @@ def tiny_5d_vigra_array_piper(graph): def test_write_new_ome_zarr_with_name_on_disc(tmp_path, tiny_5d_vigra_array_piper): export_path = tmp_path / "test.zarr/predictions/first_attempt" progress = mock.Mock() - write_ome_zarr(str(export_path), tiny_5d_vigra_array_piper.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path), tiny_5d_vigra_array_piper.Output, progress) assert export_path.exists() group = zarr.open(str(tmp_path / "test.zarr")) @@ -222,10 +223,10 @@ def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path), source_op.Output, progress) with pytest.raises(FileExistsError): - write_ome_zarr(str(export_path / "copy"), source_op.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path / "copy"), source_op.Output, progress) group = zarr.open(str(export_path)) assert "copy" not in group, "should not append to existing store" m = group.attrs["multiscales"][0] @@ -233,7 +234,7 @@ def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): source_op.Input.setValue(data_array2) with pytest.raises(FileExistsError): - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=True) + write_ome_zarr(str(export_path), source_op.Output, progress) # should not overwrite existing array numpy.testing.assert_array_equal(group["s0"], original_data_array) @@ -258,7 +259,7 @@ def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): # Exported array is 5d, so 5 scaling entries expected even though source multiscales to match are 4d expected_matching_scale_transform = [{"type": "scale", "scale": [1.0, 1.0, 3.0, 3.0, 3.0]}] - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) + write_ome_zarr(str(export_path), source_op.Output, progress) group = zarr.open(str(store_path)) assert "multiscales" in group.attrs @@ -327,7 +328,7 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): } ) - write_ome_zarr(str(export_path), source_op.Output, progress, compute_downscales=False) + write_ome_zarr(str(export_path), source_op.Output, progress) group = zarr.open(str(store_path)) assert "multiscales" in group.attrs From 8be94d7b894711e8e07b7af9af92d8d27be15d29 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Tue, 8 Oct 2024 18:01:10 +0200 Subject: [PATCH 30/35] OME-Zarr export review: Remove internal path logic Within an OME-Zarr multiscales store, internal paths and name have no use-case. See comments on PR volumina/316 --- .../operators/ioOperators/opExportSlot.py | 1 - lazyflow/utility/io_util/write_ome_zarr.py | 37 ++++++++--------- .../test_ioOperators/testOpExportSlot.py | 17 +++----- .../test_io_util/test_write_ome_zarr.py | 40 +++++-------------- 4 files changed, 32 insertions(+), 63 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 7c00dbdd5..3c886075f 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -153,7 +153,6 @@ def _executeExportPath(self, result): "compressed hdf5", "n5", "compressed n5", - "single-scale OME-Zarr", ) if self.OutputFormat.value in hierarchical_formats and self.OutputInternalPath.value != "": path_format += "/" + self.OutputInternalPath.value diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 2b752d08d..7b387b34e 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -122,24 +122,20 @@ def _match_or_create_scalings( def _create_empty_zarrays( - export_path: str, + abs_export_path: str, export_dtype, chunk_shape: Shape, export_shape: TaggedShape, output_scalings: ScalingsByScaleKey, -) -> Tuple[OrderedDict[str, zarr.Array], OrderedDict[str, ImageMetadata]]: - pc = PathComponents(export_path) - external_path = pc.externalPath - internal_path = pc.internalPath.lstrip("/") if pc.internalPath else None - store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) +) -> Tuple[OrderedDict[str, zarr.Array], OrderedDict[str, ImageMetadata]]: # + store = FSStore(abs_export_path, mode="w", **OME_ZARR_V_0_4_KWARGS) zarrays = ODict() meta = ODict() for scale_key, scaling in output_scalings.items(): - scale_path = f"{internal_path}/{scale_key}" if internal_path else scale_key zarrays[scale_key] = zarr.creation.zeros( - export_shape.values(), store=store, path=scale_path, chunks=chunk_shape, dtype=export_dtype + export_shape.values(), store=store, path=scale_key, chunks=chunk_shape, dtype=export_dtype ) - meta[scale_key] = ImageMetadata(scale_path, scaling, {}) + meta[scale_key] = ImageMetadata(scale_key, scaling, {}) return zarrays, meta @@ -276,15 +272,12 @@ def _get_datasets_meta( def _write_ome_zarr_and_ilastik_metadata( - export_path: str, + abs_export_path: str, export_meta: OrderedDict[str, ImageMetadata], scalings_relative_to_raw_input: Optional[ScalingsByScaleKey], input_ome_meta: Optional[OMEZarrMultiscaleMeta], ilastik_meta: Dict, ): - pc = PathComponents(export_path) - external_path = pc.externalPath - multiscale_name = pc.internalPath.lstrip("/") if pc.internalPath else None ilastik_signature = {"name": "ilastik", "version": ilastik_version, "ome_zarr_exporter_version": 1} export_axiskeys = [tag.key for tag in ilastik_meta["axistags"]] @@ -293,8 +286,6 @@ def _write_ome_zarr_and_ilastik_metadata( ome_zarr_multiscale_meta = {"axes": axes, "datasets": datasets, "version": "0.4"} # Optional fields - if multiscale_name: - ome_zarr_multiscale_meta["name"] = multiscale_name if input_ome_meta and input_ome_meta.multiscale_transformations: transforms_axis_matched = [] for transform in input_ome_meta.multiscale_transformations: @@ -311,7 +302,7 @@ def _write_ome_zarr_and_ilastik_metadata( ) ome_zarr_multiscale_meta["coordinateTransformations"] = transforms_axis_matched - store = FSStore(external_path, mode="w", **OME_ZARR_V_0_4_KWARGS) + store = FSStore(abs_export_path, mode="w", **OME_ZARR_V_0_4_KWARGS) root = zarr.group(store, overwrite=False) root.attrs["_creator"] = ilastik_signature root.attrs["multiscales"] = [ome_zarr_multiscale_meta] @@ -321,11 +312,17 @@ def _write_ome_zarr_and_ilastik_metadata( def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal): - if Path(PathComponents(export_path).externalPath).exists(): + pc = PathComponents(export_path) + if pc.internalPath: + raise ValueError( + f'Internal paths are not supported by OME-Zarr export. Received internal path: "{pc.internalPath}"' + ) + abs_export_path = pc.externalPath + if Path(abs_export_path).exists(): raise FileExistsError( "Aborting because export path already exists. Please delete it manually if you intended to overwrite it. " "Appending to an existing OME-Zarr store is not yet implemented." - f"\nPath: {PathComponents(export_path).externalPath}." + f"\nPath: {abs_export_path}." ) op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") @@ -341,7 +338,7 @@ def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: O chunk_shape = _get_chunk_shape(export_shape, export_dtype) export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings(input_scales, export_shape) zarrays, export_meta = _create_empty_zarrays( - export_path, export_dtype, chunk_shape, export_shape, export_scalings + abs_export_path, export_dtype, chunk_shape, export_shape, export_scalings ) requester = BigRequestStreamer(reordered_source, roiFromShape(reordered_source.meta.shape)) @@ -351,7 +348,7 @@ def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: O progress_signal(95) _write_ome_zarr_and_ilastik_metadata( - export_path, + abs_export_path, export_meta, scalings_relative_to_raw_input, input_ome_meta, diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 7f24db25f..6e060dd4b 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -81,7 +81,7 @@ def testBasic_Hdf5(self): finally: opRead.cleanUp() - def test_ome_zarr_without_internal_path(self): + def test_ome_zarr(self): data = numpy.random.random((90, 100)).astype(numpy.float32) data = vigra.taggedView(data, vigra.defaultAxistags("yx")) @@ -93,14 +93,12 @@ def test_ome_zarr_without_internal_path(self): opExport.Input.connect(opPiper.Output) opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(self._tmpdir + "/test_export_x{x_start}-{x_stop}_y{y_start}-{y_stop}") - opExport.OutputInternalPath.setValue("") # Overwrite the slot's default "exported_data" opExport.CoordinateOffset.setValue((10, 20)) assert opExport.ExportPath.ready() - export_path_components = PathComponents(opExport.ExportPath.value) expected_export_path = Path(self._tmpdir) / "test_export_x20-120_y10-100.zarr" - assert Path(export_path_components.externalPath) == expected_export_path - assert export_path_components.internalPath is None + assert Path(opExport.ExportPath.value) == expected_export_path + opExport.run_export() opRead = OpInputDataReader(graph=graph) @@ -147,8 +145,7 @@ def test_ome_zarr_roundtrip(self): } ] # Expected written meta is the same as input, but tczyx, only with the respective scale, - # and with "exported_data" as the name (internal path is mandatory due to - # OpExportData.OutputInternalPath having default="exported_data") + # and with no name expected_meta_s0 = [ { "axes": [ @@ -168,10 +165,9 @@ def test_ome_zarr_roundtrip(self): {"scale": [1.0, 1.0, 1.0, 0.2, 0.2], "type": "scale"}, {"translation": [0.0, 0.0, 0.0, 0.0, 0.0], "type": "translation"}, ], - "path": "exported_data/s0", + "path": "s0", } ], - "name": "exported_data", "version": "0.4", } ] @@ -194,10 +190,9 @@ def test_ome_zarr_roundtrip(self): {"scale": [1.0, 1.0, 1.0, 1.4, 1.4], "type": "scale"}, {"translation": [0.0, 0.0, 0.0, 7.62, 8.49], "type": "translation"}, ], - "path": "exported_data/s1", + "path": "s1", } ], - "name": "exported_data", "version": "0.4", } ] diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 84c0a1503..1c1c7f139 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -199,23 +199,6 @@ def tiny_5d_vigra_array_piper(graph): return op -def test_write_new_ome_zarr_with_name_on_disc(tmp_path, tiny_5d_vigra_array_piper): - export_path = tmp_path / "test.zarr/predictions/first_attempt" - progress = mock.Mock() - write_ome_zarr(str(export_path), tiny_5d_vigra_array_piper.Output, progress) - - assert export_path.exists() - group = zarr.open(str(tmp_path / "test.zarr")) - assert "multiscales" in group.attrs - m = group.attrs["multiscales"][0] - assert all(key in m for key in ("datasets", "axes", "version", "name")) - assert m["version"] == "0.4" - assert m["name"] == "predictions/first_attempt" - assert [a["name"] for a in m["axes"]] == ["t", "c", "z", "y", "x"] - assert all(dataset["path"] in group for dataset in m["datasets"]) - assert all(dataset["path"][0] != "/" in group for dataset in m["datasets"]) - - def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): original_data_array = tiny_5d_vigra_array_piper.Output.value data_array2 = vigra.VigraArray((1, 1, 3, 3, 3), axistags=vigra.defaultAxistags("tczyx")) @@ -226,24 +209,20 @@ def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): write_ome_zarr(str(export_path), source_op.Output, progress) with pytest.raises(FileExistsError): - write_ome_zarr(str(export_path / "copy"), source_op.Output, progress) - group = zarr.open(str(export_path)) - assert "copy" not in group, "should not append to existing store" - m = group.attrs["multiscales"][0] - assert m["datasets"][0]["path"] == "s0" + write_ome_zarr(str(export_path), source_op.Output, progress) source_op.Input.setValue(data_array2) with pytest.raises(FileExistsError): write_ome_zarr(str(export_path), source_op.Output, progress) # should not overwrite existing array + group = zarr.open(str(export_path)) numpy.testing.assert_array_equal(group["s0"], original_data_array) def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): """If the source slot has scale metadata, the export should match the scale name to the input. Scaling metadata should be relative to the input's raw data.""" - store_path = tmp_path / "test.zarr" - export_path = store_path / "subdir" + export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() input_axes = ["t", "z", "y", "x"] @@ -261,12 +240,12 @@ def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): write_ome_zarr(str(export_path), source_op.Output, progress) - group = zarr.open(str(store_path)) + group = zarr.open(str(export_path)) assert "multiscales" in group.attrs m = group.attrs["multiscales"][0] assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 - assert m["datasets"][0]["path"] == "subdir/matching_scale" + assert m["datasets"][0]["path"] == "matching_scale" assert m["datasets"][0]["coordinateTransformations"] == expected_matching_scale_transform @@ -275,8 +254,7 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): If there is OME-Zarr specific additional metadata (even unused in ilastik), the export should write metadata that describe the pyramid as a whole, and those that describe the written scale.""" - store_path = tmp_path / "test.zarr" - export_path = store_path / "subdir" + export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() multiscales: multiscaleStore.Multiscales = OrderedDict( @@ -330,12 +308,12 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): write_ome_zarr(str(export_path), source_op.Output, progress) - group = zarr.open(str(store_path)) + group = zarr.open(str(export_path)) assert "multiscales" in group.attrs m = group.attrs["multiscales"][0] assert "datasets" in m and "path" in m["datasets"][0] assert len(m["datasets"]) == 1 - assert m["name"] == "subdir" # Input name should not be carried over - presumably it names the raw data + assert "name" not in m # Input name should not be carried over - presumably it names the raw data assert m["axes"] == [ {"name": "t", "type": "time", "unit": "second"}, {"name": "c", "type": "channel"}, @@ -344,5 +322,5 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): {"name": "x", "type": "space", "unit": "micrometer"}, ] # Axis units should be carried over assert m["coordinateTransformations"] == expected_multiscale_transform - assert m["datasets"][0]["path"] == "subdir/matching_scale" + assert m["datasets"][0]["path"] == "matching_scale" assert m["datasets"][0]["coordinateTransformations"] == expected_matching_scale_transform From 2b19a02a5697c0ec63dc5f4fc1b526477682b858 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:43:58 +0200 Subject: [PATCH 31/35] OME-Zarr export: Smaller review points * Add note about scale paths * Add license * Remove unused import * Rephrase scale list comment Co-authored-by: Dominik Kutra --- .../datasetDetailedInfoTableModel.py | 4 ++-- lazyflow/utility/io_util/OMEZarrStore.py | 5 +++- lazyflow/utility/io_util/write_ome_zarr.py | 23 ++++++++++++++++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py index a2a372eb1..0e671661c 100644 --- a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py +++ b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py @@ -223,9 +223,9 @@ def get_scale_options(self, laneIndex) -> Dict[str, str]: datasetInfo = datasetSlot.value if not datasetInfo.scales: return {} - # Reverse the scale list: # Multiscale datasets always list scales from original (largest) to most-downscaled (smallest). - # We want to display them in the opposite order. + # We display them in reverse order so that the default loaded scale (the smallest) + # is the first option in the drop-down box return { key: _dims_to_display_string(tagged_shape, datasetInfo.axiskeys) for key, tagged_shape in reversed(datasetInfo.scales.items()) diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index c59f970d6..d21c70baa 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -127,7 +127,10 @@ def _get_zarr_cache_max_size() -> int: return math.floor(caches_max * permissible_fraction_max) -def scale_key_from_path(scale_path): +def scale_key_from_path(scale_path: str): + """Paths in this context are web-paths, i.e. URI components. + Backslashes would technically be valid characters in scale keys. + Please make sure not to accidentally pass Windows paths here...""" return scale_path.split("/")[-1] diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 7b387b34e..8bbc7d712 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -1,3 +1,24 @@ +############################################################################### +# lazyflow: data flow based lazy parallel computation framework +# +# Copyright (C) 2011-2024, the ilastik developers +# +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the Lesser GNU General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# See the files LICENSE.lgpl2 and LICENSE.lgpl3 for full text of the +# GNU Lesser General Public License version 2.1 and 3 respectively. +# This information is also available on the ilastik web site at: +# http://ilastik.org/license/ +############################################################################### import dataclasses import logging from collections import OrderedDict as ODict @@ -7,7 +28,7 @@ import numpy import zarr -from zarr.storage import FSStore, contains_array +from zarr.storage import FSStore from ilastik import __version__ as ilastik_version from lazyflow.operators import OpReorderAxes From a36a463bc4da559d8585af8d5ada2f9c07240d02 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:58:10 +0200 Subject: [PATCH 32/35] OME-Zarr export review: Use Pipeline for roundtrip test Co-authored-by: Dominik Kutra --- .../test_ioOperators/testOpExportSlot.py | 74 ++++++------------- 1 file changed, 24 insertions(+), 50 deletions(-) diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 6e060dd4b..322e9c505 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -1,9 +1,7 @@ -from builtins import object - ############################################################################### # lazyflow: data flow based lazy parallel computation framework # -# Copyright (C) 2011-2014, the ilastik developers +# Copyright (C) 2011-2024, the ilastik developers # # # This program is free software; you can redistribute it and/or @@ -24,7 +22,6 @@ import os import tempfile import shutil -import platform from pathlib import Path import numpy @@ -33,8 +30,7 @@ from lazyflow.graph import Graph from lazyflow.operator import Operator -from lazyflow.utility import PathComponents -from lazyflow.roi import roiFromShape +from lazyflow.utility import PathComponents, Pipeline from lazyflow.operators.operators import OpArrayPiper from lazyflow.operators import OpBlockedArrayCache from lazyflow.operators.opReorderAxes import OpReorderAxes @@ -197,76 +193,54 @@ def test_ome_zarr_roundtrip(self): } ] - path_in = self._tmpdir + "/input.zarr" - file = z5py.ZarrFile(path_in, "w") + input_path = self._tmpdir + "/input.zarr" + file = z5py.ZarrFile(input_path, "w") data = numpy.random.random((89, 99)).astype(numpy.float32) downscale = data[::7, ::7] file.create_dataset("s0", data=data) file.create_dataset("s1", data=downscale) file.attrs["multiscales"] = input_meta - graph = Graph() # Raw scale first - opRead = OpInputDataReader(graph=graph) - opExport = OpExportSlot(graph=graph) - try: - opRead.FilePath.setValue(path_in + "/s0") - - export_path = self._tmpdir + "/test_export1.zarr" - opExport.Input.connect(opRead.Output) - opExport.OutputFormat.setValue("single-scale OME-Zarr") - opExport.OutputFilenameFormat.setValue(export_path) + export_path = self._tmpdir + "/test_export1.zarr" + with Pipeline(graph=Graph()) as pipeline: + pipeline.add(OpInputDataReader, FilePath=input_path + "/s0") + opExport = pipeline.add( + OpExportSlot, OutputFormat="single-scale OME-Zarr", OutputFilenameFormat=export_path + ) opExport.run_export() assert os.path.exists(export_path) written_file = z5py.ZarrFile(export_path, "r") assert written_file.attrs["multiscales"] == expected_meta_s0 - finally: - opExport.cleanUp() - opRead.cleanUp() # Same thing for the second scale - # Have to make new ops because they aren't "recyclable" after a cleanUp - opRead = OpInputDataReader(graph=graph) - opExport = OpExportSlot(graph=graph) - try: - opRead.FilePath.setValue(path_in + "/s1") - - export_path = self._tmpdir + "/test_export2.zarr" - opExport.Input.connect(opRead.Output) - opExport.OutputFormat.setValue("single-scale OME-Zarr") - opExport.OutputFilenameFormat.setValue(export_path) + export_path = self._tmpdir + "/test_export2.zarr" + with Pipeline(graph=Graph()) as pipeline: + pipeline.add(OpInputDataReader, FilePath=input_path + "/s1") + opExport = pipeline.add( + OpExportSlot, OutputFormat="single-scale OME-Zarr", OutputFilenameFormat=export_path + ) opExport.run_export() assert os.path.exists(export_path) written_file = z5py.ZarrFile(export_path, "r") assert written_file.attrs["multiscales"] == expected_meta_s1 - finally: - opExport.cleanUp() - opRead.cleanUp() # Another time, but give path as URI to go through OMEZarrMultiscaleReader - # opRead then needs a parent to avoid the multiscale reader going into single-scale mode - noop = Operator(graph=graph) - opRead = OpInputDataReader(parent=noop) - opExport = OpExportSlot(parent=noop) - try: - opRead.FilePath.setValue(Path(path_in).as_uri()) - opRead.ActiveScale.setValue("s1") - - export_path = self._tmpdir + "/test_export3.zarr" - opExport.Input.connect(opRead.Output) - opExport.OutputFormat.setValue("single-scale OME-Zarr") - opExport.OutputFilenameFormat.setValue(export_path) + # OpInputDataReader then needs a parent to avoid the multiscale reader going into single-scale mode + noop = Operator(graph=Graph()) + export_path = self._tmpdir + "/test_export3.zarr" + with Pipeline(parent=noop) as pipeline: + pipeline.add(OpInputDataReader, FilePath=Path(input_path).as_uri(), ActiveScale="s1") + opExport = pipeline.add( + OpExportSlot, OutputFormat="single-scale OME-Zarr", OutputFilenameFormat=export_path + ) opExport.run_export() assert os.path.exists(export_path) written_file = z5py.ZarrFile(export_path, "r") assert written_file.attrs["multiscales"] == expected_meta_s1 - finally: - opExport.cleanUp() - opRead.cleanUp() - noop.cleanUp() def testBasic_Npy(self): data = numpy.random.random((100, 100)).astype(numpy.float32) From b52128034cf6b34dcd69b869d1d59177b1723a2c Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:53:10 +0200 Subject: [PATCH 33/35] OME-Zarr export: Account for subregion export I.e. the user chose to export only a crop of the full output. This means that if the input was multiscale, the export's shape cannot be used to infer which scale in the input it came from -> have to pass the active_scale through from the reader ops. Additionally, if the crop does not start at 0 along any axis, CoordinateOffset will record the position relative to the full output. This corresponds to a translation-type coordinateTransformation in OME-Zarr. --- .../operators/ioOperators/opExportSlot.py | 3 +- .../ioOperators/opOMEZarrMultiscaleReader.py | 1 + ...opRESTfulPrecomputedChunkedVolumeReader.py | 1 + .../ioOperators/opStreamingH5N5Reader.py | 1 + lazyflow/utility/io_util/write_ome_zarr.py | 94 ++++++++++++++----- .../test_ioOperators/testOpExportSlot.py | 9 ++ .../test_io_util/test_write_ome_zarr.py | 30 +++--- 7 files changed, 106 insertions(+), 33 deletions(-) diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py index 3c886075f..90b01854c 100644 --- a/lazyflow/operators/ioOperators/opExportSlot.py +++ b/lazyflow/operators/ioOperators/opExportSlot.py @@ -412,8 +412,9 @@ def _export_multipage_tiff_sequence(self): def _export_ome_zarr(self): self.progressSignal(0) + offset_meta = self.CoordinateOffset.value if self.CoordinateOffset.ready() else None try: - write_ome_zarr(self.ExportPath.value, self.Input, self.progressSignal) + write_ome_zarr(self.ExportPath.value, self.Input, offset_meta, self.progressSignal) finally: self.progressSignal(100) diff --git a/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py b/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py index 4fffb3435..a57b3e514 100644 --- a/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py +++ b/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py @@ -59,6 +59,7 @@ def setupOutputs(self): self.Output.meta.dtype = self._store.dtype self.Output.meta.axistags = self._store.axistags self.Output.meta.scales = self._store.multiscales + self.Output.meta.active_scale = active_scale # Used by export to correlate export with input scale # To feed back to DatasetInfo and hence the project file self.Output.meta.lowest_scale = self._store.lowest_resolution_key # Many public OME-Zarr datasets are chunked as full xy slices, diff --git a/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py b/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py index e8b8281f2..71e3bc450 100644 --- a/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py +++ b/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py @@ -63,6 +63,7 @@ def setupOutputs(self): self.Output.meta.dtype = numpy.dtype(self._volume_object.dtype).type self.Output.meta.axistags = self._volume_object.axistags self.Output.meta.scales = self._volume_object.multiscales + self.Output.meta.active_scale = active_scale # Used by export to correlate export with input scale # To feed back to DatasetInfo and hence the project file self.Output.meta.lowest_scale = self._volume_object.lowest_resolution_key diff --git a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py index 8efe2d19d..6552c2d87 100644 --- a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py +++ b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py @@ -168,6 +168,7 @@ def setupOutputs(self): ] scales: Multiscales = OrderedDict(zip(scale_keys, scale_tagged_shapes)) self.OutputImage.meta.scales = scales + self.OutputImage.meta.active_scale = scale_key_from_path(internalPath) self.OutputImage.meta.lowest_scale = scale_keys[-1] self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec(multiscale_spec) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 8bbc7d712..5050605a0 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -46,7 +46,7 @@ Shape = Tuple[int, ...] TaggedShape = OrderedDict[str, int] # { axis: size } -OrderedScaling = OrderedDict[str, float] # { axis: scaling } +OrderedScaling = OrderedTranslation = OrderedDict[str, float] # { axis: scaling } ScalingsByScaleKey = OrderedDict[str, OrderedScaling] # { scale_key: { axis: scaling } } SPATIAL_AXES = ["z", "y", "x"] @@ -56,7 +56,7 @@ class ImageMetadata: path: str scaling: OrderedScaling - translation: Dict[str, float] + translation: OrderedTranslation def _get_chunk_shape(tagged_image_shape: TaggedShape, dtype) -> Shape: @@ -73,20 +73,20 @@ def _get_chunk_shape(tagged_image_shape: TaggedShape, dtype) -> Shape: def _get_input_multiscale_matching_export( - input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape + input_scales: multiscaleStore.Multiscales, input_scale_key: str ) -> multiscaleStore.Multiscales: """Filter for multiscales entry that matches source image.""" matching_scales = [] # Multiscales is ordered from highest to lowest resolution for key, scale_shape in input_scales.items(): - if all(scale_shape[a] == export_shape[a] or a == "c" for a in scale_shape.keys()): + if key == input_scale_key: matching_scales.append((key, scale_shape)) break assert len(matching_scales) > 0, "Should be impossible, input must be one of the scales" return ODict(matching_scales) -def _scale_shapes_to_factors( +def _multiscale_shapes_to_factors( multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape, output_axiskeys: List[Literal["t", "c", "z", "y", "x"]], @@ -118,7 +118,7 @@ def _scale_shapes_to_factors( def _match_or_create_scalings( - input_scales: multiscaleStore.Multiscales, export_shape: TaggedShape + input_scales: multiscaleStore.Multiscales, input_scale_key: str, export_shape: TaggedShape ) -> Tuple[ScalingsByScaleKey, Optional[ScalingsByScaleKey]]: """ Determine scale keys and scaling factors for export. @@ -127,12 +127,17 @@ def _match_or_create_scalings( """ if input_scales: # Source image is already multiscale, match its scales - filtered_input_scales = _get_input_multiscale_matching_export(input_scales, export_shape) - factors_relative_to_export = _scale_shapes_to_factors(filtered_input_scales, export_shape, export_shape.keys()) + filtered_input_scales = _get_input_multiscale_matching_export(input_scales, input_scale_key) + # The export might be a crop of the source scale it corresponds to (the first one in the filtered list). + # Need the full shape of that scale as the base for scaling factors. + base_shape = next(iter(filtered_input_scales.values())) + factors_relative_to_export = _multiscale_shapes_to_factors( + filtered_input_scales, base_shape, export_shape.keys() + ) scalings_relative_to_export = ODict(zip(filtered_input_scales.keys(), factors_relative_to_export)) # Factors relative to raw scale are used later to provide correct scaling metadata raw_shape = next(iter(input_scales.values())) - factors_relative_to_raw = _scale_shapes_to_factors(filtered_input_scales, raw_shape, export_shape.keys()) + factors_relative_to_raw = _multiscale_shapes_to_factors(filtered_input_scales, raw_shape, export_shape.keys()) scalings_relative_to_raw = ODict(zip(filtered_input_scales.keys(), factors_relative_to_raw)) else: # Compute new scale levels @@ -202,13 +207,13 @@ def _get_input_dataset_transformations( def _update_export_scaling_from_input( absolute_scaling: OrderedScaling, - input_ome_meta: Optional[OMEZarrMultiscaleMeta], + input_axiskeys: Optional[List[Literal["t", "c", "z", "y", "x"]]], input_scale: Optional[OMEZarrCoordinateTransformation], scale_key: str, ) -> OrderedScaling: if not input_scale or "scale" not in input_scale: return absolute_scaling - input_scaling = ODict(zip(input_ome_meta.axis_units.keys(), input_scale["scale"])) + input_scaling = ODict(zip(input_axiskeys, input_scale["scale"])) if any([input_scaling[a] != absolute_scaling[a] for a in SPATIAL_AXES if a in input_scaling]): # This shouldn't happen logger.warning( @@ -255,10 +260,44 @@ def _get_axes_meta(export_axiskeys, input_ome_meta): return axes +def _get_total_offset( + absolute_scaling: OrderedScaling, + image: ImageMetadata, + export_offset: TaggedShape, + input_axiskeys: Optional[List[Literal["t", "c", "z", "y", "x"]]], + input_translation: Optional[OMEZarrCoordinateTransformation], +) -> OrderedTranslation: + # Translation may be a total of scale offset, export offset, and input offset (depending on availability) + export_axiskeys = list(absolute_scaling.keys()) + noop_translation: OrderedTranslation = ODict(zip(export_axiskeys, [0.0] * len(export_axiskeys))) + base_translation = image.translation if image.translation else noop_translation.copy() + reordered_export_offset = noop_translation.copy() + reordered_input_translation = noop_translation.copy() + if export_offset: + # offset may still have arbitrary axes here + # multiply by absolute scaling to obtain physical units if possible (which the final translation should be) + reordered_export_offset = ODict( + [(a, export_offset[a] * absolute_scaling[a] if a in export_offset else 0.0) for a in export_axiskeys] + ) + if input_translation and "translation" in input_translation: + tagged_translation = ODict(zip(input_axiskeys, input_translation["translation"])) + reordered_input_translation = ODict( + [(a, tagged_translation[a] if a in tagged_translation else 0.0) for a in export_axiskeys] + ) + combined_translation = ODict( + [ + (a, base_translation[a] + reordered_export_offset[a] + reordered_input_translation[a]) + for a in export_axiskeys + ] + ) + return combined_translation + + def _get_datasets_meta( multiscale_metadata: OrderedDict[str, ImageMetadata], input_ome_meta: Optional[OMEZarrMultiscaleMeta], scalings_relative_to_raw_input: Optional[ScalingsByScaleKey], + export_offset: Optional[TaggedShape], ): """ Dataset metadata consists of (1) path, (2) coordinate transformations (scale and translation). @@ -268,6 +307,7 @@ def _get_datasets_meta( """ datasets = [] raw_data_abs_scale = _get_input_raw_absolute_scaling(input_ome_meta) + input_axiskeys = input_ome_meta.axis_units.keys() if input_ome_meta else None for scale_key, image in multiscale_metadata.items(): if scalings_relative_to_raw_input and scale_key in scalings_relative_to_raw_input: relative_scaling = scalings_relative_to_raw_input[scale_key] @@ -277,17 +317,19 @@ def _get_datasets_meta( # but the input dataset might contain absolute scale values, i.e. time/pixel resolution absolute_scaling = _make_absolute_if_possible(relative_scaling, raw_data_abs_scale) input_scale, input_translation = _get_input_dataset_transformations(input_ome_meta, scale_key) - absolute_scaling = _update_export_scaling_from_input(absolute_scaling, input_ome_meta, input_scale, scale_key) + absolute_scaling = _update_export_scaling_from_input(absolute_scaling, input_axiskeys, input_scale, scale_key) dataset = { "path": image.path, "coordinateTransformations": [{"type": "scale", "scale": list(absolute_scaling.values())}], } - if input_translation and "translation" in input_translation: - tagged_translation = ODict(zip(input_ome_meta.axis_units.keys(), input_translation["translation"])) - reordered_translation = [ - tagged_translation[a] if a in tagged_translation and a != "c" else 0.0 for a in image.scaling.keys() - ] - dataset["coordinateTransformations"].append({"type": "translation", "translation": reordered_translation}) + combined_translation = _get_total_offset( + absolute_scaling, image, export_offset, input_axiskeys, input_translation + ) + # Write translation if the input had it (even if it was all 0s), or if the export offset is non-zero + if input_translation or any(t != 0 for t in combined_translation.values()): + dataset["coordinateTransformations"].append( + {"type": "translation", "translation": list(combined_translation.values())} + ) datasets.append(dataset) return datasets @@ -296,6 +338,7 @@ def _write_ome_zarr_and_ilastik_metadata( abs_export_path: str, export_meta: OrderedDict[str, ImageMetadata], scalings_relative_to_raw_input: Optional[ScalingsByScaleKey], + export_offset: Optional[TaggedShape], input_ome_meta: Optional[OMEZarrMultiscaleMeta], ilastik_meta: Dict, ): @@ -303,7 +346,7 @@ def _write_ome_zarr_and_ilastik_metadata( export_axiskeys = [tag.key for tag in ilastik_meta["axistags"]] axes = _get_axes_meta(export_axiskeys, input_ome_meta) - datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input) + datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input, export_offset) ome_zarr_multiscale_meta = {"axes": axes, "datasets": datasets, "version": "0.4"} # Optional fields @@ -332,7 +375,9 @@ def _write_ome_zarr_and_ilastik_metadata( _write_to_dataset_attrs(ilastik_meta, za) -def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: OrderedSignal): +def write_ome_zarr( + export_path: str, image_source_slot: Slot, export_offset: Optional[Shape], progress_signal: OrderedSignal +): pc = PathComponents(export_path) if pc.internalPath: raise ValueError( @@ -345,6 +390,9 @@ def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: O "Appending to an existing OME-Zarr store is not yet implemented." f"\nPath: {abs_export_path}." ) + export_offset: TaggedShape = ( + ODict(zip(image_source_slot.meta.getAxisKeys(), export_offset)) if export_offset else None + ) op_reorder = OpReorderAxes(parent=image_source_slot.operator) op_reorder.AxisOrder.setValue("tczyx") try: @@ -354,10 +402,13 @@ def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: O export_shape = reordered_source.meta.getTaggedShape() export_dtype = reordered_source.meta.dtype input_scales = reordered_source.meta.scales if "scales" in reordered_source.meta else None + input_scale_key = reordered_source.meta.active_scale if "scales" in reordered_source.meta else None input_ome_meta = reordered_source.meta.get("ome_zarr_meta") chunk_shape = _get_chunk_shape(export_shape, export_dtype) - export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings(input_scales, export_shape) + export_scalings, scalings_relative_to_raw_input = _match_or_create_scalings( + input_scales, input_scale_key, export_shape + ) zarrays, export_meta = _create_empty_zarrays( abs_export_path, export_dtype, chunk_shape, export_shape, export_scalings ) @@ -372,6 +423,7 @@ def write_ome_zarr(export_path: str, image_source_slot: Slot, progress_signal: O abs_export_path, export_meta, scalings_relative_to_raw_input, + export_offset, input_ome_meta, { "axistags": reordered_source.meta.axistags, diff --git a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py index 322e9c505..6bd975c77 100644 --- a/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py +++ b/tests/test_lazyflow/test_operators/test_ioOperators/testOpExportSlot.py @@ -90,6 +90,10 @@ def test_ome_zarr(self): opExport.OutputFormat.setValue("single-scale OME-Zarr") opExport.OutputFilenameFormat.setValue(self._tmpdir + "/test_export_x{x_start}-{x_stop}_y{y_start}-{y_stop}") opExport.CoordinateOffset.setValue((10, 20)) + expected_transformations = [ + {"type": "scale", "scale": [1.0, 1.0, 1.0, 1.0, 1.0]}, + {"type": "translation", "translation": [0.0, 0.0, 0.0, 10.0, 20.0]}, + ] assert opExport.ExportPath.ready() expected_export_path = Path(self._tmpdir) / "test_export_x20-120_y10-100.zarr" @@ -103,6 +107,11 @@ def test_ome_zarr(self): expected_data = data.view(numpy.ndarray).reshape((1, 1, 1) + data.shape) # OME-Zarr always tczyx read_data = opRead.Output[:].wait() numpy.testing.assert_array_equal(read_data, expected_data) + written_file = z5py.ZarrFile(str(expected_export_path), "r") + assert ( + written_file.attrs["multiscales"][0]["datasets"][0]["coordinateTransformations"] + == expected_transformations + ) finally: opRead.cleanUp() diff --git a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py index 1c1c7f139..8b2db15ba 100644 --- a/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py +++ b/tests/test_lazyflow/test_utility/test_io_util/test_write_ome_zarr.py @@ -32,7 +32,7 @@ def test_metadata_integrity(tmp_path, graph, shape, axes): source_op.Input.setValue(data_array) progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, None, progress) expected_axiskeys = "tczyx" assert export_path.exists() @@ -206,14 +206,14 @@ def test_do_not_overwrite(tmp_path, tiny_5d_vigra_array_piper): export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, None, progress) with pytest.raises(FileExistsError): - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, None, progress) source_op.Input.setValue(data_array2) with pytest.raises(FileExistsError): - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, None, progress) # should not overwrite existing array group = zarr.open(str(export_path)) numpy.testing.assert_array_equal(group["s0"], original_data_array) @@ -234,11 +234,12 @@ def test_match_input_scale_key_and_factors(tmp_path, tiny_5d_vigra_array_piper): ] ) source_op.Output.meta.scales = multiscales + source_op.Output.meta.active_scale = "matching_scale" # Scale metadata should be relative to raw scale, even if the export was not scaled # Exported array is 5d, so 5 scaling entries expected even though source multiscales to match are 4d expected_matching_scale_transform = [{"type": "scale", "scale": [1.0, 1.0, 3.0, 3.0, 3.0]}] - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, None, progress) group = zarr.open(str(export_path)) assert "multiscales" in group.attrs @@ -253,22 +254,29 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): """If the source slot has scale metadata, the export should match the scale name to the input. If there is OME-Zarr specific additional metadata (even unused in ilastik), the export should write metadata that describe the pyramid as a whole, and those that - describe the written scale.""" + describe the written scale. + If there is a CoordinateOffset (i.e. the user has restricted a ROI in the export settings), + this should be written to the multiscale's coordinateTransformations - and correctly combined + with the input's coordinateTransformations (multiplied by scale to convert pixel offset to physical units, + and added to the input's translation).""" export_path = tmp_path / "test.zarr" source_op = tiny_5d_vigra_array_piper progress = mock.Mock() multiscales: multiscaleStore.Multiscales = OrderedDict( [ - ("raw_scale", OrderedDict([("t", 2), ("c", 2), ("z", 10), ("y", 10), ("x", 10)])), - ("matching_scale", OrderedDict([("t", 2), ("c", 2), ("z", 5), ("y", 5), ("x", 5)])), - ("downscale", OrderedDict([("t", 2), ("c", 2), ("z", 2), ("y", 2), ("x", 2)])), + ("raw_scale", OrderedDict([("t", 2), ("z", 17), ("y", 17), ("x", 17)])), + ("matching_scale", OrderedDict([("t", 2), ("z", 9), ("y", 9), ("x", 9)])), + ("downscale", OrderedDict([("t", 2), ("z", 5), ("y", 5), ("x", 5)])), ] ) + # The tiny_5d_array is 5x5x5; in this test it represents a subregion of source matching_scale after a 4/4/4 offset + export_offset = (0, 0, 4, 4, 4) source_op.Output.meta.scales = multiscales + source_op.Output.meta.active_scale = "matching_scale" expected_multiscale_transform = [{"type": "scale", "scale": [0.1, 1.0, 1.0, 1.0, 1.0]}] expected_matching_scale_transform = [ {"type": "scale", "scale": [1.0, 1.0, 2.0, 2.0, 2.0]}, - {"type": "translation", "translation": [0.1, 0.0, 3.2, 1.0, 1.0]}, + {"type": "translation", "translation": [0.1, 0.0, 11.2, 9.0, 9.0]}, # input translation + (offset*input scale) ] source_op.Output.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec( { @@ -306,7 +314,7 @@ def test_port_ome_zarr_metadata_from_input(tmp_path, tiny_5d_vigra_array_piper): } ) - write_ome_zarr(str(export_path), source_op.Output, progress) + write_ome_zarr(str(export_path), source_op.Output, export_offset, progress) group = zarr.open(str(export_path)) assert "multiscales" in group.attrs From dd604aa55889efe0fe7b3c9dc17b0225e892e6dc Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:03:20 +0200 Subject: [PATCH 34/35] OME-Zarr export: Clarify and harden Transformations ambiguity --- lazyflow/utility/io_util/OMEZarrStore.py | 99 +++++++++++++++++----- lazyflow/utility/io_util/write_ome_zarr.py | 88 +++++++++++-------- 2 files changed, 132 insertions(+), 55 deletions(-) diff --git a/lazyflow/utility/io_util/OMEZarrStore.py b/lazyflow/utility/io_util/OMEZarrStore.py index d21c70baa..660819963 100644 --- a/lazyflow/utility/io_util/OMEZarrStore.py +++ b/lazyflow/utility/io_util/OMEZarrStore.py @@ -18,13 +18,13 @@ # on the ilastik web site at: # http://ilastik.org/license.html ############################################################################### -import dataclasses +from dataclasses import dataclass import json import logging import math import os from collections import OrderedDict -from typing import Dict, List, Optional, Union, Literal +from typing import Dict, List, Optional, Union, Literal, Tuple from urllib.parse import unquote_to_bytes import jsonschema @@ -42,27 +42,87 @@ OME_ZARR_V_0_4_KWARGS = dict(dimension_separator="/", normalize_keys=False) OME_ZARR_V_0_1_KWARGS = dict(dimension_separator=".") -# { -# "type": "scale" OR "translation", -# "scale": List[float] OR "translation": List[float] OR "path": str -# } -OMEZarrCoordinateTransformation = Dict[str, Union[str, List[float]]] +class InvalidTransformationError(ValueError): + pass -def _remove_transforms_with_path( - coordinate_transformations: Optional[List[OMEZarrCoordinateTransformation]], -) -> Optional[List[OMEZarrCoordinateTransformation]]: + +@dataclass(frozen=True) +class OMEZarrCoordinateTransformation: + """Used by OME-Zarr export to adjust export metadata according to input.""" + + type: Literal["scale", "translation"] + values: Optional[List[float]] + + @classmethod + def from_json(cls, json_data: Dict) -> "OMEZarrCoordinateTransformation": + """Expected dicts look like + { + "type": Literal["scale", "translation"] + and EITHER "scale": List[number] OR "translation": List[number] + } + Unfortunately, the spec is internally inconsistent, so there is a chance that we may encounter + a coordinateTransformation with a "path" key instead of "scale" or "translation"; and possibly + coordinateTransformations with "type": "identity". + Afaik, none of the more popular converters/writers do this. + """ + if ( + json_data["type"] not in ("scale", "translation") + or ("scale" not in json_data and "translation" not in json_data) + or "path" in json_data + ): + raise InvalidTransformationError() + # Could raise KeyError for real nonsense like {"type": "scale", "translation": [0, 0]} + return cls(type=json_data["type"], values=json_data[json_data["type"]]) + + +TransformationsOrError = Union[ + Tuple[OMEZarrCoordinateTransformation, Optional[OMEZarrCoordinateTransformation]], InvalidTransformationError +] + + +def _validate_transforms( + coordinate_transformations: Optional[List[Dict[str, Union[str, List[float]]]]], +) -> Optional[TransformationsOrError]: """ - coordinateTransformations may provide a path to transformation data in binary format, instead - of specifying floats for each axis. We don't know of any commonly used tool that writes such data, - so there is no way to know what to do with this. + Resolves the OME-Zarr spec's inconsistency in the coordinateTransformations field. + Avoids raising errors because valid metadata are not required to load and work with the data. + Distinguishes between None and invalid transformations so that caller can warn on the latter. + Returns: + - None if input was None (allowed for multiscale_transformations) + - Tuple of scale transform and optionally translation transform if valid + - InvalidTransformationError if invalid (e.g. not None but also no scale transform present) + Inattentive writers might produce invalid transforms, depending on what part of the spec they read. + The Transformations spec [1] allows for "identity" transforms and arbitrary numbers of transforms, + but the Multiscales spec [2] only allows exactly one "scale", optionally followed by one "translation" + transform. + The "official" validator's schema [3] implements neither of these rules exactly :) It instead allows + for exactly one "scale" transform, plus an arbitrary number of "translation" transforms, in any order. + But this plus the example at the start of the OME-Zarr spec make it clear enough indicator that + "one scale + one optional translation" is the convention, and all public datasets conform to this. + To be graceful, we'll accept the first scale and translation. + [1] https://ngff.openmicroscopy.org/latest/index.html#trafo-md + [2] https://ngff.openmicroscopy.org/latest/index.html#multiscale-md + [3] https://github.com/ome/ngff/blob/1383ce6218539baf9fe4350c46d992f2dbfe7af1/0.4/schemas/image.schema#L167 """ if coordinate_transformations is None: return None - return [transform for transform in coordinate_transformations if "path" not in transform] + if not isinstance(coordinate_transformations, list): + return InvalidTransformationError() + scale_transform = translation_transform = None + for t in coordinate_transformations: + try: + transform = OMEZarrCoordinateTransformation.from_json(t) + except (InvalidTransformationError, KeyError): + continue + if scale_transform is None and transform.type == "scale": + scale_transform = transform + if translation_transform is None and transform.type == "translation": + translation_transform = transform + return scale_transform, translation_transform if scale_transform else InvalidTransformationError() -@dataclasses.dataclass +@dataclass(frozen=True) class OMEZarrMultiscaleMeta: """ Specifically for metadata that ilastik does _not_ use internally. @@ -71,8 +131,8 @@ class OMEZarrMultiscaleMeta: axis_units: OrderedDict[Literal["t", "c", "z", "y", "x"], Optional[str]] # { axis_key: axis_unit } multiscale_name: Optional[str] - multiscale_transformations: Optional[List[OMEZarrCoordinateTransformation]] - dataset_transformations: OrderedDict[str, List[OMEZarrCoordinateTransformation]] # { scale_key: transformations } + multiscale_transformations: Optional[TransformationsOrError] + dataset_transformations: OrderedDict[str, TransformationsOrError] # { scale_key: transformations } @classmethod def from_multiscale_spec(cls, multiscale_spec) -> "OMEZarrMultiscaleMeta": @@ -81,15 +141,16 @@ def from_multiscale_spec(cls, multiscale_spec) -> "OMEZarrMultiscaleMeta": axis_units = OrderedDict([(a["name"], a.get("unit")) for a in multiscale_spec["axes"]]) else: axis_units = OrderedDict([(tag.key, None) for tag in get_axistags_from_spec(multiscale_spec)]) + invalid_transformations = [] # Ensure dataset transformations are never None (either valid or error) return cls( axis_units=axis_units, multiscale_name=multiscale_spec.get("name"), - multiscale_transformations=_remove_transforms_with_path(multiscale_spec.get("coordinateTransformations")), + multiscale_transformations=_validate_transforms(multiscale_spec.get("coordinateTransformations")), dataset_transformations=OrderedDict( [ ( scale_key_from_path(scale["path"]), - _remove_transforms_with_path(scale.get("coordinateTransformations", [])), + _validate_transforms(scale.get("coordinateTransformations", invalid_transformations)), ) for scale in multiscale_spec["datasets"] ] diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 5050605a0..4971e1683 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -40,6 +40,7 @@ OME_ZARR_V_0_4_KWARGS, OMEZarrMultiscaleMeta, OMEZarrCoordinateTransformation, + InvalidTransformationError, ) logger = logging.getLogger(__name__) @@ -161,7 +162,7 @@ def _create_empty_zarrays( zarrays[scale_key] = zarr.creation.zeros( export_shape.values(), store=store, path=scale_key, chunks=chunk_shape, dtype=export_dtype ) - meta[scale_key] = ImageMetadata(scale_key, scaling, {}) + meta[scale_key] = ImageMetadata(scale_key, scaling, ODict()) return zarrays, meta @@ -179,29 +180,29 @@ def _scale_and_write_block(scales: ScalingsByScaleKey, zarrays: OrderedDict[str, def _get_input_raw_absolute_scaling(input_ome_meta: Optional[OMEZarrMultiscaleMeta]) -> Optional[OrderedScaling]: - input_scale = None - if input_ome_meta: - raw_transforms = next(iter(input_ome_meta.dataset_transformations.values())) - # Spec requires that if any, first must be scale - if len(raw_transforms) > 0: - input_scale = raw_transforms[0] - if input_scale is None or "scale" not in input_scale: + if not input_ome_meta: + return None + raw_transforms = next(iter(input_ome_meta.dataset_transformations.values())) + if isinstance(raw_transforms, InvalidTransformationError): return None - return ODict(zip(input_ome_meta.axis_units.keys(), input_scale["scale"])) + raw_scale, _ = raw_transforms + return ODict(zip(input_ome_meta.axis_units.keys(), raw_scale.values)) def _get_input_dataset_transformations( input_ome_meta: Optional[OMEZarrMultiscaleMeta], scale_key: str ) -> Tuple[Optional[OMEZarrCoordinateTransformation], Optional[OMEZarrCoordinateTransformation]]: - input_scale = None - input_translation = None + input_scale = input_translation = None if input_ome_meta and input_ome_meta.dataset_transformations.get(scale_key): input_transforms = input_ome_meta.dataset_transformations[scale_key] - # Spec requires that if any, first must be scale, second may be translation - if len(input_transforms) > 0: - input_scale = input_transforms[0] - if len(input_transforms) > 1: - input_translation = input_transforms[1] + if isinstance(input_transforms, InvalidTransformationError): + logger.warning( + "The input OME-Zarr dataset contained invalid pixel resolution or crop " + f'position metadata for scale "{scale_key}". ' + "The exported data should be fine, but please check its metadata." + ) + return None, None + input_scale, input_translation = input_transforms return input_scale, input_translation @@ -211,9 +212,9 @@ def _update_export_scaling_from_input( input_scale: Optional[OMEZarrCoordinateTransformation], scale_key: str, ) -> OrderedScaling: - if not input_scale or "scale" not in input_scale: + if input_scale is None: return absolute_scaling - input_scaling = ODict(zip(input_axiskeys, input_scale["scale"])) + input_scaling = ODict(zip(input_axiskeys, input_scale.values)) if any([input_scaling[a] != absolute_scaling[a] for a in SPATIAL_AXES if a in input_scaling]): # This shouldn't happen logger.warning( @@ -279,8 +280,8 @@ def _get_total_offset( reordered_export_offset = ODict( [(a, export_offset[a] * absolute_scaling[a] if a in export_offset else 0.0) for a in export_axiskeys] ) - if input_translation and "translation" in input_translation: - tagged_translation = ODict(zip(input_axiskeys, input_translation["translation"])) + if input_translation: + tagged_translation = ODict(zip(input_axiskeys, input_translation.values)) reordered_input_translation = ODict( [(a, tagged_translation[a] if a in tagged_translation else 0.0) for a in export_axiskeys] ) @@ -334,6 +335,34 @@ def _get_datasets_meta( return datasets +def _get_multiscale_transformations( + input_ome_meta: Optional[OMEZarrMultiscaleMeta], export_axiskeys: List[Literal["t", "c", "z", "y", "x"]] +) -> Optional[List[Dict]]: + """Extracts multiscale transformations from input OME-Zarr metadata, if available. + Returns None or the transformations adjusted to export axes as OME-Zarr conforming dicts.""" + if input_ome_meta and isinstance(input_ome_meta.multiscale_transformations, tuple): + transforms_axis_matched = [] + for transform in input_ome_meta.multiscale_transformations: + if transform is None: + continue + tagged_transform = ODict(zip(input_ome_meta.axis_units.keys(), transform.values)) + default_value = 0.0 if transform.type == "translation" else 1.0 + transforms_axis_matched.append( + { + "type": transform.type, + transform.type: [ + tagged_transform[a] if a in tagged_transform else default_value for a in export_axiskeys + ], + } + ) + return transforms_axis_matched + elif input_ome_meta and input_ome_meta.multiscale_transformations is not None: + logger.warning( + "The input OME-Zarr dataset contained invalid pixel resolution or crop position metadata. " + "The exported data should be fine, but please check its metadata." + ) + + def _write_ome_zarr_and_ilastik_metadata( abs_export_path: str, export_meta: OrderedDict[str, ImageMetadata], @@ -349,22 +378,9 @@ def _write_ome_zarr_and_ilastik_metadata( datasets = _get_datasets_meta(export_meta, input_ome_meta, scalings_relative_to_raw_input, export_offset) ome_zarr_multiscale_meta = {"axes": axes, "datasets": datasets, "version": "0.4"} - # Optional fields - if input_ome_meta and input_ome_meta.multiscale_transformations: - transforms_axis_matched = [] - for transform in input_ome_meta.multiscale_transformations: - transform_type = transform["type"] - tagged_transform = ODict(zip(input_ome_meta.axis_units.keys(), transform[transform_type])) - default_value = 0.0 if transform_type == "translation" else 1.0 - transforms_axis_matched.append( - { - "type": transform_type, - transform_type: [ - tagged_transform[a] if a in tagged_transform else default_value for a in export_axiskeys - ], - } - ) - ome_zarr_multiscale_meta["coordinateTransformations"] = transforms_axis_matched + multiscale_transformations = _get_multiscale_transformations(input_ome_meta, export_axiskeys) + if multiscale_transformations: + ome_zarr_multiscale_meta["coordinateTransformations"] = multiscale_transformations store = FSStore(abs_export_path, mode="w", **OME_ZARR_V_0_4_KWARGS) root = zarr.group(store, overwrite=False) From 6c6573824be0b2af5d5a3616dd1510338bfee4d2 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:48:30 +0200 Subject: [PATCH 35/35] OME-Zarr export: Add axiskey type alias Co-authored-by: Dominik Kutra --- lazyflow/utility/io_util/write_ome_zarr.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lazyflow/utility/io_util/write_ome_zarr.py b/lazyflow/utility/io_util/write_ome_zarr.py index 4971e1683..85bb89b0e 100644 --- a/lazyflow/utility/io_util/write_ome_zarr.py +++ b/lazyflow/utility/io_util/write_ome_zarr.py @@ -46,8 +46,9 @@ logger = logging.getLogger(__name__) Shape = Tuple[int, ...] -TaggedShape = OrderedDict[str, int] # { axis: size } -OrderedScaling = OrderedTranslation = OrderedDict[str, float] # { axis: scaling } +Axiskey = Literal["t", "z", "y", "x", "c"] +TaggedShape = OrderedDict[Axiskey, int] # { axis: size } +OrderedScaling = OrderedTranslation = OrderedDict[Axiskey, float] # { axis: scaling } ScalingsByScaleKey = OrderedDict[str, OrderedScaling] # { scale_key: { axis: scaling } } SPATIAL_AXES = ["z", "y", "x"] @@ -90,7 +91,7 @@ def _get_input_multiscale_matching_export( def _multiscale_shapes_to_factors( multiscales: multiscaleStore.Multiscales, base_shape: TaggedShape, - output_axiskeys: List[Literal["t", "c", "z", "y", "x"]], + output_axiskeys: List[Axiskey], ) -> List[OrderedScaling]: """Multiscales and base_shape may have arbitrary axes. Output are scaling factors relative to base_shape, with axes output_axiskeys. @@ -208,7 +209,7 @@ def _get_input_dataset_transformations( def _update_export_scaling_from_input( absolute_scaling: OrderedScaling, - input_axiskeys: Optional[List[Literal["t", "c", "z", "y", "x"]]], + input_axiskeys: Optional[List[Axiskey]], input_scale: Optional[OMEZarrCoordinateTransformation], scale_key: str, ) -> OrderedScaling: @@ -265,7 +266,7 @@ def _get_total_offset( absolute_scaling: OrderedScaling, image: ImageMetadata, export_offset: TaggedShape, - input_axiskeys: Optional[List[Literal["t", "c", "z", "y", "x"]]], + input_axiskeys: Optional[List[Axiskey]], input_translation: Optional[OMEZarrCoordinateTransformation], ) -> OrderedTranslation: # Translation may be a total of scale offset, export offset, and input offset (depending on availability) @@ -336,7 +337,7 @@ def _get_datasets_meta( def _get_multiscale_transformations( - input_ome_meta: Optional[OMEZarrMultiscaleMeta], export_axiskeys: List[Literal["t", "c", "z", "y", "x"]] + input_ome_meta: Optional[OMEZarrMultiscaleMeta], export_axiskeys: List[Axiskey] ) -> Optional[List[Dict]]: """Extracts multiscale transformations from input OME-Zarr metadata, if available. Returns None or the transformations adjusted to export axes as OME-Zarr conforming dicts."""