Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cube chunks for weights in aggregations with smart weights #6288

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ This document explains the changes made to Iris for this release
:doc:`/developers_guide/release_do_nothing` to be more thorough and apply
lessons learned from recent releases. (:pull:`6062`)

#. `@schlunma`_ made lazy [smart
weights](https://github.com/SciTools/iris/pull/5084) used for cube
aggregations have the same chunks as their parent cube if broadcasting is
necessary. (:issue:`6285`, :pull:`6288`)


.. comment
Whatsnew author names (@github name) in alphabetical order. Note that,
Expand All @@ -102,4 +107,4 @@ This document explains the changes made to Iris for this release


.. comment
Whatsnew resources in alphabetical order:
Whatsnew resources in alphabetical order:
5 changes: 5 additions & 0 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,10 +1198,15 @@ def __init__(self, weights, cube):
dim_metadata = cube._dimensional_metadata(weights)
derived_array = dim_metadata._core_values()
if dim_metadata.shape != cube.shape:
if isinstance(derived_array, da.Array):
chunks = cube.lazy_data().chunks
else:
chunks = None
derived_array = iris.util.broadcast_to_shape(
derived_array,
cube.shape,
dim_metadata.cube_dims(cube),
chunks=chunks,
)
derived_units = dim_metadata.units

Expand Down
5 changes: 2 additions & 3 deletions lib/iris/analysis/cartography.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,8 @@ def area_weights(cube, normalize=False, compute=True, chunks=None):
If False, return a lazy dask array. If True, return a numpy array.
chunks : tuple, optional
If compute is False and a value is provided, then the result will use
these chunks instead of the same chunks as the cube data. The values
provided here will only be used along dimensions that are not latitude
or longitude.
these chunks. The values provided here will only be used along
dimensions that are not latitude or longitude.

Returns
-------
Expand Down
47 changes: 32 additions & 15 deletions lib/iris/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,8 +1192,6 @@ def test_rotate_1d(self):

@_shared_utils.skip_data
class TestAreaWeights:
# Note: chunks is simply ignored for non-lazy data
@pytest.mark.parametrize("chunks", [None, (2, 3)])
@pytest.fixture(autouse=True)
def _setup(self, request):
self.request = request
Expand Down Expand Up @@ -1726,15 +1724,24 @@ def test_weights_in_kwargs(self):
assert kwargs == {"test_kwarg": "test", "weights": "ignored"}


@pytest.mark.parametrize("lazy", [True, False])
class TestWeights:
@pytest.fixture(autouse=True)
def _setup_test_data(self):
self.array_lib = np
self.target_type = np.ndarray
def _setup_test_data(self, lazy):
if lazy:
self.array_lib = da
self.target_type = da.Array
self.chunks = ((2,), (1, 1, 1))
else:
self.array_lib = np
self.target_type = np.ndarray
self.chunks = None
self.create_test_data()

def create_test_data(self):
self.data = self.array_lib.arange(6).reshape(2, 3)
if self.chunks is not None:
self.data = self.data.rechunk(self.chunks)
self.lat = iris.coords.DimCoord(
self.array_lib.array([0, 1]),
standard_name="latitude",
Expand Down Expand Up @@ -1770,13 +1777,17 @@ def test_init_with_array(self):
assert isinstance(weights.units, cf_units.Unit)
assert weights.array is self.data
assert weights.units == "1"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_cube(self):
weights = _Weights(self.cube, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
assert weights.array is self.data
assert weights.units == "K"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_str_dim_coord(self):
weights = _Weights("latitude", self.cube)
Expand All @@ -1792,20 +1803,28 @@ def test_init_with_str_aux_coord(self):
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[3, 3, 3], [4, 4, 4]])
assert weights.units == "s"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_str_ancillary_variable(self):
weights = _Weights("ancvar", self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[5, 6, 7], [5, 6, 7]])
assert weights.units == "kg"
# Chunks of existing array dimensions passed to broadcast_to_shape are
# ignored
if self.chunks is not None:
assert weights.array.chunks == ((2,), (3,))

def test_init_with_str_cell_measure(self):
weights = _Weights("cell_area", self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, self.data)
assert weights.units == "m2"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_dim_coord(self):
weights = _Weights(self.lat, self.cube)
Expand All @@ -1821,20 +1840,28 @@ def test_init_with_aux_coord(self):
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[3, 3, 3], [4, 4, 4]])
assert weights.units == "s"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_ancillary_variable(self):
weights = _Weights(self.ancillary_variable, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[5, 6, 7], [5, 6, 7]])
assert weights.units == "kg"
# Chunks of existing array dimensions passed to broadcast_to_shape are
# ignored
if self.chunks is not None:
assert weights.array.chunks == ((2,), (3,))

def test_init_with_cell_measure(self):
weights = _Weights(self.cell_measure, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, self.data)
assert weights.units == "m2"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_list(self):
list_in = [0, 1, 2]
Expand All @@ -1845,16 +1872,6 @@ def test_init_with_list(self):
assert weights.units == "1"


class TestWeightsLazy(TestWeights):
"""Repeat tests from ``TestWeights`` with lazy arrays."""

@pytest.fixture(autouse=True)
def _setup_test_data(self):
self.array_lib = da
self.target_type = da.core.Array
self.create_test_data()


def test__Groupby_repr():
groupby_coord = iris.coords.AuxCoord([2000, 2000], var_name="year")
shared_coord = iris.coords.DimCoord(
Expand Down
Loading