Merge pull request ilastik#2907 from btbest/export-ome-zarr

OME-Zarr goes in, OME-Zarr comes out
btbest · Oct 15, 2024 · dfef849 · dfef849
2 parents fc2865e + 6c65738
commit dfef849
Show file tree

Hide file tree

Showing 15 changed files with 1,259 additions and 134 deletions.
diff --git a/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py b/ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py
@@ -18,7 +18,7 @@
 # on the ilastik web site at:
 # 		   http://ilastik.org/license.html
 ###############################################################################
-from typing import List, Dict
+from typing import Dict
 
 from PyQt5.QtCore import Qt, QAbstractItemModel, QModelIndex
 from ilastik.utility import bind
@@ -37,11 +37,10 @@ class DatasetColumn:
     NumColumns = 6
 
 
-def _dims_to_display_string(dimensions: List[int], axiskeys: str) -> str:
-    """Generate labels to put into the scale combobox.
-    Scale dimensions must be in xyz and will be reordered to match axiskeys."""
-    input_axes = dict(zip("xyz", dimensions))
-    reordered_dimensions = [input_axes[axis] for axis in axiskeys if axis in input_axes]
+def _dims_to_display_string(dimensions: Dict[str, int], axiskeys: str) -> str:
+    """Generate labels to put into the scale combobox / to display in the table.
+    XYZ dimensions will be reordered to match axiskeys."""
+    reordered_dimensions = [dimensions[axis] for axis in axiskeys if axis in "xyz"]
     return ", ".join(str(size) for size in reordered_dimensions)
 
 
@@ -224,7 +223,13 @@ def get_scale_options(self, laneIndex) -> Dict[str, str]:
         datasetInfo = datasetSlot.value
         if not datasetInfo.scales:
             return {}
-        return {key: _dims_to_display_string(dims, datasetInfo.axiskeys) for key, dims in datasetInfo.scales.items()}
+        # Multiscale datasets always list scales from original (largest) to most-downscaled (smallest).
+        # We display them in reverse order so that the default loaded scale (the smallest)
+        # is the first option in the drop-down box
+        return {
+            key: _dims_to_display_string(tagged_shape, datasetInfo.axiskeys)
+            for key, tagged_shape in reversed(datasetInfo.scales.items())
+        }
 
     def is_scale_locked(self, laneIndex) -> bool:
         datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex]

diff --git a/ilastik/applets/dataSelection/opDataSelection.py b/ilastik/applets/dataSelection/opDataSelection.py
@@ -128,7 +128,7 @@ def __init__(
         self.legacy_datasetId = self.generate_id()
         self.working_scale = working_scale
         self.scale_locked = scale_locked
-        self.scales = OrderedDict()  # {scale_key: scale_dimensions}, see MultiscaleStore.multiscales
+        self.scales = OrderedDict()  # {scale_key: tagged_scale_shape}, see MultiscaleStore.multiscales
 
     @property
     def shape5d(self) -> Shape5D:

diff --git a/lazyflow/operators/ioOperators/opExportSlot.py b/lazyflow/operators/ioOperators/opExportSlot.py
@@ -1,12 +1,7 @@
-from builtins import zip
-from builtins import map
-
-from builtins import object
-
 ###############################################################################
 #   lazyflow: data flow based lazy parallel computation framework
 #
-#       Copyright (C) 2011-2014, the ilastik developers
+#       Copyright (C) 2011-2024, the ilastik developers
 #                                <[email protected]>
 #
 # This program is free software; you can redistribute it and/or
@@ -25,7 +20,6 @@
 # 		   http://ilastik.org/license/
 ###############################################################################
 import os
-import shutil
 import collections
 import contextlib
 from functools import partial
@@ -35,7 +29,7 @@
 
 from lazyflow.graph import Operator, InputSlot, OutputSlot
 from lazyflow.roi import roiFromShape
-from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p
+from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p, isUrl
 from lazyflow.operators.ioOperators import (
     OpH5N5WriterBigDataset,
     OpStreamingH5N5Reader,
@@ -46,6 +40,7 @@
     OpExportMultipageTiffSequence,
     OpExportToArray,
 )
+from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr
 
 try:
     from lazyflow.operators.ioOperators import OpExportDvidVolume
@@ -84,6 +79,7 @@ class OpExportSlot(Operator):
     _2d_exts = vigra.impex.listExtensions().split()
 
     # List all supported formats
+    # Only FormatInfo.name is used (to generate help text for a cmd parameter, DataExportApplet)
     _2d_formats = [FormatInfo(ext, ext, 2, 2) for ext in _2d_exts]
     _3d_sequence_formats = [FormatInfo(ext + " sequence", ext, 3, 3) for ext in _2d_exts]
     _3d_volume_formats = [FormatInfo("multipage tiff", "tiff", 3, 3)]
@@ -93,11 +89,11 @@ class OpExportSlot(Operator):
         FormatInfo("compressed hdf5", "h5", 0, 5),
         FormatInfo("n5", "n5", 0, 5),
         FormatInfo("compressed n5", "n5", 0, 5),
+        FormatInfo("single-scale OME-Zarr", "zarr", 0, 5),
         FormatInfo("numpy", "npy", 0, 5),
         FormatInfo("dvid", "", 2, 5),
         FormatInfo("blockwise hdf5", "json", 0, 5),
     ]
-
     ALL_FORMATS = _2d_formats + _3d_sequence_formats + _3d_volume_formats + _4d_sequence_formats + nd_format_formats
 
     def __init__(self, *args, **kwargs):
@@ -110,6 +106,7 @@ def __init__(self, *args, **kwargs):
         export_impls["compressed hdf5"] = ("h5", partial(self._export_h5n5, True))
         export_impls["n5"] = ("n5", self._export_h5n5)
         export_impls["compressed n5"] = ("n5", partial(self._export_h5n5, True))
+        export_impls["single-scale OME-Zarr"] = ("zarr", self._export_ome_zarr)
         export_impls["numpy"] = ("npy", self._export_npy)
         export_impls["dvid"] = ("", self._export_dvid)
         export_impls["blockwise hdf5"] = ("json", self._export_blockwise_hdf5)
@@ -151,7 +148,13 @@ def _executeExportPath(self, result):
             path_format += "." + file_extension
 
         # Provide the TOTAL path (including dataset name)
-        if self.OutputFormat.value in ("hdf5", "compressed hdf5", "n5", "compressed n5"):
+        hierarchical_formats = (
+            "hdf5",
+            "compressed hdf5",
+            "n5",
+            "compressed n5",
+        )
+        if self.OutputFormat.value in hierarchical_formats and self.OutputInternalPath.value != "":
             path_format += "/" + self.OutputInternalPath.value
 
         roi = numpy.array(roiFromShape(self.Input.meta.shape))
@@ -186,7 +189,15 @@ def _get_format_selection_error_msg(self, *args):
         output_format = self.OutputFormat.value
 
         # These cases support all combinations
-        if output_format in ("hdf5", "compressed hdf5", "n5", "compressed n5", "npy", "blockwise hdf5"):
+        if output_format in (
+            "hdf5",
+            "compressed hdf5",
+            "n5",
+            "compressed n5",
+            "npy",
+            "blockwise hdf5",
+            "single-scale OME-Zarr",
+        ):
             return ""
 
         tagged_shape = self.Input.meta.getTaggedShape()
@@ -252,10 +263,10 @@ def run_export(self):
         try:
             export_func = self._export_impls[output_format][1]
         except KeyError as e:
-            raise Exception(f"Unknown export format: {output_format}") from e
-        else:
+            raise NotImplementedError(f"Unknown export format: {output_format}") from e
+        if not isUrl(self.ExportPath.value):
             mkdir_p(PathComponents(self.ExportPath.value).externalDirectory)
-            export_func()
+        export_func()
 
     def _export_h5n5(self, compress=False):
         self.progressSignal(0)
@@ -399,12 +410,33 @@ def _export_multipage_tiff_sequence(self):
             opExport.cleanUp()
             self.progressSignal(100)
 
+    def _export_ome_zarr(self):
+        self.progressSignal(0)
+        offset_meta = self.CoordinateOffset.value if self.CoordinateOffset.ready() else None
+        try:
+            write_ome_zarr(self.ExportPath.value, self.Input, offset_meta, self.progressSignal)
+        finally:
+            self.progressSignal(100)
+
 
 np = numpy
 
 
 class FormatValidity(object):
 
+    ALL_DTYPES = (
+        np.uint8,
+        np.uint16,
+        np.uint32,
+        np.uint64,
+        np.int8,
+        np.int16,
+        np.int32,
+        np.int64,
+        np.float32,
+        np.float64,
+    )
+
     # { extension : [permitted formats] }
     dtypes = {
         "jpg": (np.uint8,),
@@ -425,66 +457,12 @@ class FormatValidity(object):
         "ppm": (np.uint8, np.uint16),
         "pgm": (np.uint8, np.uint16),
         "pbm": (np.uint8, np.uint16),  # vigra outputs p[gn]m
-        "numpy": (
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-        ),
-        "hdf5": (
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-        ),
-        "compressed hdf5": (
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-        ),
-        "n5": (
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-        ),
-        "compressed n5": (
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-        ),
+        "numpy": ALL_DTYPES,
+        "hdf5": ALL_DTYPES,
+        "compressed hdf5": ALL_DTYPES,
+        "n5": ALL_DTYPES,
+        "compressed n5": ALL_DTYPES,
+        "single-scale OME-Zarr": ALL_DTYPES,
     }
 
     # { extension : (min_ndim, max_ndim) }
@@ -505,6 +483,7 @@ class FormatValidity(object):
         "compressed hdf5": (0, 5),
         "n5": (0, 5),
         "compressed n5": (0, 5),
+        "single-scale OME-Zarr": (0, 5),
     }
 
     # { extension : [allowed_num_channels] }
@@ -525,6 +504,7 @@ class FormatValidity(object):
         "compressed hdf5": (),  # ditto
         "n5": (),  # ditto
         "compressed n5": (),  # ditto
+        "single-scale OME-Zarr": (),
     }
 
     @classmethod

diff --git a/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py b/lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py
@@ -59,11 +59,14 @@ def setupOutputs(self):
         self.Output.meta.dtype = self._store.dtype
         self.Output.meta.axistags = self._store.axistags
         self.Output.meta.scales = self._store.multiscales
+        self.Output.meta.active_scale = active_scale  # Used by export to correlate export with input scale
         # To feed back to DatasetInfo and hence the project file
         self.Output.meta.lowest_scale = self._store.lowest_resolution_key
         # Many public OME-Zarr datasets are chunked as full xy slices,
         # so orthoviews lead to downloading the entire dataset.
         self.Output.meta.prefer_2d = True
+        # Add OME-Zarr metadata to slot so that it can be ported over to an export
+        self.Output.meta.ome_zarr_meta = self._store.ome_meta_for_export
 
     def execute(self, slot, subindex, roi, result):
         scale = self.Scale.value if self.Scale.ready() and self.Scale.value else self._store.lowest_resolution_key

diff --git a/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py b/lazyflow/operators/ioOperators/opRESTfulPrecomputedChunkedVolumeReader.py
@@ -63,6 +63,7 @@ def setupOutputs(self):
         self.Output.meta.dtype = numpy.dtype(self._volume_object.dtype).type
         self.Output.meta.axistags = self._volume_object.axistags
         self.Output.meta.scales = self._volume_object.multiscales
+        self.Output.meta.active_scale = active_scale  # Used by export to correlate export with input scale
         # To feed back to DatasetInfo and hence the project file
         self.Output.meta.lowest_scale = self._volume_object.lowest_resolution_key
 

diff --git a/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py b/lazyflow/operators/ioOperators/opStreamingH5N5Reader.py
@@ -23,6 +23,7 @@
 import contextlib
 import logging
 import time
+from collections import OrderedDict
 from typing import Union
 
 import vigra
@@ -33,7 +34,12 @@
 from lazyflow.graph import Operator, InputSlot, OutputSlot
 from lazyflow.utility import Timer
 from lazyflow.utility.helpers import get_default_axisordering, bigintprod
-from lazyflow.utility.io_util.OMEZarrStore import get_axistags_from_spec as get_ome_zarr_axistags
+from lazyflow.utility.io_util.OMEZarrStore import (
+    get_axistags_from_spec as get_ome_zarr_axistags,
+    OMEZarrMultiscaleMeta,
+    scale_key_from_path,
+)
+from lazyflow.utility.io_util.multiscaleStore import Multiscales
 
 logger = logging.getLogger(__name__)
 
@@ -52,12 +58,7 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile],
         try:
             # Look for OME-Zarr metadata (found at store root, not in dataset)
             # OME-Zarr stores with more than one multiscale don't exist in public, but the spec allows it
-            multiscale_index = None
-            for i, scale in enumerate(file.attrs["multiscales"]):
-                if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])):
-                    multiscale_index = i
-            if multiscale_index is None:
-                raise KeyError("no spec for dataset path")
+            multiscale_index = _multiscale_index_for_path(file.attrs["multiscales"], internalPath)
             return get_ome_zarr_axistags(file.attrs["multiscales"][multiscale_index])
         except KeyError as e:
             msg = (
@@ -79,6 +80,16 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile],
     return vigra.defaultAxistags(str(axisorder))
 
 
+def _multiscale_index_for_path(multiscales_spec, internalPath: str):
+    multiscale_index = None
+    for i, scale in enumerate(multiscales_spec):
+        if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])):
+            multiscale_index = i
+    if multiscale_index is None:
+        raise KeyError("no spec for dataset path")
+    return multiscale_index
+
+
 class OpStreamingH5N5Reader(Operator):
     """
     The top-level operator for the data selection applet.
@@ -146,6 +157,21 @@ def setupOutputs(self):
         if chunks:
             self.OutputImage.meta.ideal_blockshape = chunks
 
+        if isinstance(self._h5N5File, z5py.ZarrFile):
+            # Add OME-Zarr metadata to slot so that it can be ported over to an export
+            multiscales_meta = self._h5N5File.attrs["multiscales"]
+            multiscale_spec = multiscales_meta[_multiscale_index_for_path(multiscales_meta, internalPath)]
+            scale_keys = [scale_key_from_path(dataset["path"]) for dataset in multiscale_spec["datasets"]]
+            scale_tagged_shapes = [
+                OrderedDict(zip(axistags.keys(), self._h5N5File[dataset["path"]].shape))
+                for dataset in multiscale_spec["datasets"]
+            ]
+            scales: Multiscales = OrderedDict(zip(scale_keys, scale_tagged_shapes))
+            self.OutputImage.meta.scales = scales
+            self.OutputImage.meta.active_scale = scale_key_from_path(internalPath)
+            self.OutputImage.meta.lowest_scale = scale_keys[-1]
+            self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec(multiscale_spec)
+
     def execute(self, slot, subindex, roi, result):
         t = time.time()
         assert self._h5N5File is not None