Skip to content

Commit

Permalink
Merge pull request ilastik#2907 from btbest/export-ome-zarr
Browse files Browse the repository at this point in the history
OME-Zarr goes in, OME-Zarr comes out
  • Loading branch information
btbest authored Oct 15, 2024
2 parents fc2865e + 6c65738 commit dfef849
Show file tree
Hide file tree
Showing 15 changed files with 1,259 additions and 134 deletions.
19 changes: 12 additions & 7 deletions ilastik/applets/dataSelection/datasetDetailedInfoTableModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# on the ilastik web site at:
# http://ilastik.org/license.html
###############################################################################
from typing import List, Dict
from typing import Dict

from PyQt5.QtCore import Qt, QAbstractItemModel, QModelIndex
from ilastik.utility import bind
Expand All @@ -37,11 +37,10 @@ class DatasetColumn:
NumColumns = 6


def _dims_to_display_string(dimensions: List[int], axiskeys: str) -> str:
"""Generate labels to put into the scale combobox.
Scale dimensions must be in xyz and will be reordered to match axiskeys."""
input_axes = dict(zip("xyz", dimensions))
reordered_dimensions = [input_axes[axis] for axis in axiskeys if axis in input_axes]
def _dims_to_display_string(dimensions: Dict[str, int], axiskeys: str) -> str:
"""Generate labels to put into the scale combobox / to display in the table.
XYZ dimensions will be reordered to match axiskeys."""
reordered_dimensions = [dimensions[axis] for axis in axiskeys if axis in "xyz"]
return ", ".join(str(size) for size in reordered_dimensions)


Expand Down Expand Up @@ -224,7 +223,13 @@ def get_scale_options(self, laneIndex) -> Dict[str, str]:
datasetInfo = datasetSlot.value
if not datasetInfo.scales:
return {}
return {key: _dims_to_display_string(dims, datasetInfo.axiskeys) for key, dims in datasetInfo.scales.items()}
# Multiscale datasets always list scales from original (largest) to most-downscaled (smallest).
# We display them in reverse order so that the default loaded scale (the smallest)
# is the first option in the drop-down box
return {
key: _dims_to_display_string(tagged_shape, datasetInfo.axiskeys)
for key, tagged_shape in reversed(datasetInfo.scales.items())
}

def is_scale_locked(self, laneIndex) -> bool:
datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex]
Expand Down
2 changes: 1 addition & 1 deletion ilastik/applets/dataSelection/opDataSelection.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def __init__(
self.legacy_datasetId = self.generate_id()
self.working_scale = working_scale
self.scale_locked = scale_locked
self.scales = OrderedDict() # {scale_key: scale_dimensions}, see MultiscaleStore.multiscales
self.scales = OrderedDict() # {scale_key: tagged_scale_shape}, see MultiscaleStore.multiscales

@property
def shape5d(self) -> Shape5D:
Expand Down
128 changes: 54 additions & 74 deletions lazyflow/operators/ioOperators/opExportSlot.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from builtins import zip
from builtins import map

from builtins import object

###############################################################################
# lazyflow: data flow based lazy parallel computation framework
#
# Copyright (C) 2011-2014, the ilastik developers
# Copyright (C) 2011-2024, the ilastik developers
# <[email protected]>
#
# This program is free software; you can redistribute it and/or
Expand All @@ -25,7 +20,6 @@
# http://ilastik.org/license/
###############################################################################
import os
import shutil
import collections
import contextlib
from functools import partial
Expand All @@ -35,7 +29,7 @@

from lazyflow.graph import Operator, InputSlot, OutputSlot
from lazyflow.roi import roiFromShape
from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p
from lazyflow.utility import OrderedSignal, format_known_keys, PathComponents, mkdir_p, isUrl
from lazyflow.operators.ioOperators import (
OpH5N5WriterBigDataset,
OpStreamingH5N5Reader,
Expand All @@ -46,6 +40,7 @@
OpExportMultipageTiffSequence,
OpExportToArray,
)
from lazyflow.utility.io_util.write_ome_zarr import write_ome_zarr

try:
from lazyflow.operators.ioOperators import OpExportDvidVolume
Expand Down Expand Up @@ -84,6 +79,7 @@ class OpExportSlot(Operator):
_2d_exts = vigra.impex.listExtensions().split()

# List all supported formats
# Only FormatInfo.name is used (to generate help text for a cmd parameter, DataExportApplet)
_2d_formats = [FormatInfo(ext, ext, 2, 2) for ext in _2d_exts]
_3d_sequence_formats = [FormatInfo(ext + " sequence", ext, 3, 3) for ext in _2d_exts]
_3d_volume_formats = [FormatInfo("multipage tiff", "tiff", 3, 3)]
Expand All @@ -93,11 +89,11 @@ class OpExportSlot(Operator):
FormatInfo("compressed hdf5", "h5", 0, 5),
FormatInfo("n5", "n5", 0, 5),
FormatInfo("compressed n5", "n5", 0, 5),
FormatInfo("single-scale OME-Zarr", "zarr", 0, 5),
FormatInfo("numpy", "npy", 0, 5),
FormatInfo("dvid", "", 2, 5),
FormatInfo("blockwise hdf5", "json", 0, 5),
]

ALL_FORMATS = _2d_formats + _3d_sequence_formats + _3d_volume_formats + _4d_sequence_formats + nd_format_formats

def __init__(self, *args, **kwargs):
Expand All @@ -110,6 +106,7 @@ def __init__(self, *args, **kwargs):
export_impls["compressed hdf5"] = ("h5", partial(self._export_h5n5, True))
export_impls["n5"] = ("n5", self._export_h5n5)
export_impls["compressed n5"] = ("n5", partial(self._export_h5n5, True))
export_impls["single-scale OME-Zarr"] = ("zarr", self._export_ome_zarr)
export_impls["numpy"] = ("npy", self._export_npy)
export_impls["dvid"] = ("", self._export_dvid)
export_impls["blockwise hdf5"] = ("json", self._export_blockwise_hdf5)
Expand Down Expand Up @@ -151,7 +148,13 @@ def _executeExportPath(self, result):
path_format += "." + file_extension

# Provide the TOTAL path (including dataset name)
if self.OutputFormat.value in ("hdf5", "compressed hdf5", "n5", "compressed n5"):
hierarchical_formats = (
"hdf5",
"compressed hdf5",
"n5",
"compressed n5",
)
if self.OutputFormat.value in hierarchical_formats and self.OutputInternalPath.value != "":
path_format += "/" + self.OutputInternalPath.value

roi = numpy.array(roiFromShape(self.Input.meta.shape))
Expand Down Expand Up @@ -186,7 +189,15 @@ def _get_format_selection_error_msg(self, *args):
output_format = self.OutputFormat.value

# These cases support all combinations
if output_format in ("hdf5", "compressed hdf5", "n5", "compressed n5", "npy", "blockwise hdf5"):
if output_format in (
"hdf5",
"compressed hdf5",
"n5",
"compressed n5",
"npy",
"blockwise hdf5",
"single-scale OME-Zarr",
):
return ""

tagged_shape = self.Input.meta.getTaggedShape()
Expand Down Expand Up @@ -252,10 +263,10 @@ def run_export(self):
try:
export_func = self._export_impls[output_format][1]
except KeyError as e:
raise Exception(f"Unknown export format: {output_format}") from e
else:
raise NotImplementedError(f"Unknown export format: {output_format}") from e
if not isUrl(self.ExportPath.value):
mkdir_p(PathComponents(self.ExportPath.value).externalDirectory)
export_func()
export_func()

def _export_h5n5(self, compress=False):
self.progressSignal(0)
Expand Down Expand Up @@ -399,12 +410,33 @@ def _export_multipage_tiff_sequence(self):
opExport.cleanUp()
self.progressSignal(100)

def _export_ome_zarr(self):
self.progressSignal(0)
offset_meta = self.CoordinateOffset.value if self.CoordinateOffset.ready() else None
try:
write_ome_zarr(self.ExportPath.value, self.Input, offset_meta, self.progressSignal)
finally:
self.progressSignal(100)


np = numpy


class FormatValidity(object):

ALL_DTYPES = (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
)

# { extension : [permitted formats] }
dtypes = {
"jpg": (np.uint8,),
Expand All @@ -425,66 +457,12 @@ class FormatValidity(object):
"ppm": (np.uint8, np.uint16),
"pgm": (np.uint8, np.uint16),
"pbm": (np.uint8, np.uint16), # vigra outputs p[gn]m
"numpy": (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
),
"hdf5": (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
),
"compressed hdf5": (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
),
"n5": (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
),
"compressed n5": (
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float32,
np.float64,
),
"numpy": ALL_DTYPES,
"hdf5": ALL_DTYPES,
"compressed hdf5": ALL_DTYPES,
"n5": ALL_DTYPES,
"compressed n5": ALL_DTYPES,
"single-scale OME-Zarr": ALL_DTYPES,
}

# { extension : (min_ndim, max_ndim) }
Expand All @@ -505,6 +483,7 @@ class FormatValidity(object):
"compressed hdf5": (0, 5),
"n5": (0, 5),
"compressed n5": (0, 5),
"single-scale OME-Zarr": (0, 5),
}

# { extension : [allowed_num_channels] }
Expand All @@ -525,6 +504,7 @@ class FormatValidity(object):
"compressed hdf5": (), # ditto
"n5": (), # ditto
"compressed n5": (), # ditto
"single-scale OME-Zarr": (),
}

@classmethod
Expand Down
3 changes: 3 additions & 0 deletions lazyflow/operators/ioOperators/opOMEZarrMultiscaleReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,14 @@ def setupOutputs(self):
self.Output.meta.dtype = self._store.dtype
self.Output.meta.axistags = self._store.axistags
self.Output.meta.scales = self._store.multiscales
self.Output.meta.active_scale = active_scale # Used by export to correlate export with input scale
# To feed back to DatasetInfo and hence the project file
self.Output.meta.lowest_scale = self._store.lowest_resolution_key
# Many public OME-Zarr datasets are chunked as full xy slices,
# so orthoviews lead to downloading the entire dataset.
self.Output.meta.prefer_2d = True
# Add OME-Zarr metadata to slot so that it can be ported over to an export
self.Output.meta.ome_zarr_meta = self._store.ome_meta_for_export

def execute(self, slot, subindex, roi, result):
scale = self.Scale.value if self.Scale.ready() and self.Scale.value else self._store.lowest_resolution_key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def setupOutputs(self):
self.Output.meta.dtype = numpy.dtype(self._volume_object.dtype).type
self.Output.meta.axistags = self._volume_object.axistags
self.Output.meta.scales = self._volume_object.multiscales
self.Output.meta.active_scale = active_scale # Used by export to correlate export with input scale
# To feed back to DatasetInfo and hence the project file
self.Output.meta.lowest_scale = self._volume_object.lowest_resolution_key

Expand Down
40 changes: 33 additions & 7 deletions lazyflow/operators/ioOperators/opStreamingH5N5Reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import contextlib
import logging
import time
from collections import OrderedDict
from typing import Union

import vigra
Expand All @@ -33,7 +34,12 @@
from lazyflow.graph import Operator, InputSlot, OutputSlot
from lazyflow.utility import Timer
from lazyflow.utility.helpers import get_default_axisordering, bigintprod
from lazyflow.utility.io_util.OMEZarrStore import get_axistags_from_spec as get_ome_zarr_axistags
from lazyflow.utility.io_util.OMEZarrStore import (
get_axistags_from_spec as get_ome_zarr_axistags,
OMEZarrMultiscaleMeta,
scale_key_from_path,
)
from lazyflow.utility.io_util.multiscaleStore import Multiscales

logger = logging.getLogger(__name__)

Expand All @@ -52,12 +58,7 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile],
try:
# Look for OME-Zarr metadata (found at store root, not in dataset)
# OME-Zarr stores with more than one multiscale don't exist in public, but the spec allows it
multiscale_index = None
for i, scale in enumerate(file.attrs["multiscales"]):
if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])):
multiscale_index = i
if multiscale_index is None:
raise KeyError("no spec for dataset path")
multiscale_index = _multiscale_index_for_path(file.attrs["multiscales"], internalPath)
return get_ome_zarr_axistags(file.attrs["multiscales"][multiscale_index])
except KeyError as e:
msg = (
Expand All @@ -79,6 +80,16 @@ def _find_or_infer_axistags(file: Union[h5py.File, z5py.N5File, z5py.ZarrFile],
return vigra.defaultAxistags(str(axisorder))


def _multiscale_index_for_path(multiscales_spec, internalPath: str):
multiscale_index = None
for i, scale in enumerate(multiscales_spec):
if any(d.get("path", "") == internalPath.lstrip("/") for d in scale.get("datasets", [])):
multiscale_index = i
if multiscale_index is None:
raise KeyError("no spec for dataset path")
return multiscale_index


class OpStreamingH5N5Reader(Operator):
"""
The top-level operator for the data selection applet.
Expand Down Expand Up @@ -146,6 +157,21 @@ def setupOutputs(self):
if chunks:
self.OutputImage.meta.ideal_blockshape = chunks

if isinstance(self._h5N5File, z5py.ZarrFile):
# Add OME-Zarr metadata to slot so that it can be ported over to an export
multiscales_meta = self._h5N5File.attrs["multiscales"]
multiscale_spec = multiscales_meta[_multiscale_index_for_path(multiscales_meta, internalPath)]
scale_keys = [scale_key_from_path(dataset["path"]) for dataset in multiscale_spec["datasets"]]
scale_tagged_shapes = [
OrderedDict(zip(axistags.keys(), self._h5N5File[dataset["path"]].shape))
for dataset in multiscale_spec["datasets"]
]
scales: Multiscales = OrderedDict(zip(scale_keys, scale_tagged_shapes))
self.OutputImage.meta.scales = scales
self.OutputImage.meta.active_scale = scale_key_from_path(internalPath)
self.OutputImage.meta.lowest_scale = scale_keys[-1]
self.OutputImage.meta.ome_zarr_meta = OMEZarrMultiscaleMeta.from_multiscale_spec(multiscale_spec)

def execute(self, slot, subindex, roi, result):
t = time.time()
assert self._h5N5File is not None
Expand Down
Loading

0 comments on commit dfef849

Please sign in to comment.