Skip to content

Commit c349436

Browse files
authored
Speed up geometry encoding (#535)
* Speed up geometry encoding Closes #534 * cleanup * just save bool * optimize dataset creation * fix typing
1 parent acc1eb0 commit c349436

File tree

2 files changed

+37
-32
lines changed

2 files changed

+37
-32
lines changed

cf_xarray/geometry.py

+36-31
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,8 @@ def shapely_to_cf(
459459
"and set the grid mapping variable as a coordinate",
460460
)
461461

462-
# Get all types to call the appropriate translation function.
463-
types = {
464-
geom.item().geom_type if isinstance(geom, xr.DataArray) else geom.geom_type
465-
for geom in geometries
466-
}
462+
as_data = geometries.data if isinstance(geometries, xr.DataArray) else geometries
463+
type_ = as_data[0].geom_type
467464

468465
grid_mapping_varname = None
469466
if (
@@ -482,16 +479,21 @@ def shapely_to_cf(
482479
suffix=suffix, grid_mapping_name=grid_mapping, grid_mapping=grid_mapping_varname
483480
)
484481

485-
if types.issubset({"Point", "MultiPoint"}):
486-
ds = points_to_cf(geometries, names=names)
487-
elif types.issubset({"LineString", "MultiLineString"}):
488-
ds = lines_to_cf(geometries, names=names)
489-
elif types.issubset({"Polygon", "MultiPolygon"}):
490-
ds = polygons_to_cf(geometries, names=names)
491-
else:
482+
try:
483+
if type_ in ["Point", "MultiPoint"]:
484+
ds = points_to_cf(geometries, names=names)
485+
elif type_ in ["LineString", "MultiLineString"]:
486+
ds = lines_to_cf(geometries, names=names)
487+
elif type_ in ["Polygon", "MultiPolygon"]:
488+
ds = polygons_to_cf(geometries, names=names)
489+
else:
490+
raise ValueError(
491+
f"This geometry type is not supported in CF-compliant datasets. Got {type_}"
492+
)
493+
except NotImplementedError as e:
492494
raise ValueError(
493-
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
494-
)
495+
"Error converting geometries. Possibly you have provided mixed geometry types."
496+
) from e
495497

496498
return ds
497499

@@ -841,7 +843,7 @@ def polygons_to_cf(
841843
node_count = part_node_count
842844
elif len(offsets) >= 2:
843845
indices = np.take(offsets[0], offsets[1])
844-
interior_ring = np.isin(offsets[0], indices, invert=True)[:-1].astype(int)
846+
interior_ring = np.isin(offsets[0], indices, invert=True)[:-1]
845847

846848
if len(offsets) == 3:
847849
indices = np.take(indices, offsets[2])
@@ -852,29 +854,32 @@ def polygons_to_cf(
852854
crdX = geom_coords[:, 0]
853855
crdY = geom_coords[:, 1]
854856

857+
data_vars = {names.node_count: (dim, node_count)}
858+
geometry_attrs = names.geometry_container_attrs
859+
860+
# Special case when we have no MultiPolygons and no holes
861+
if len(part_node_count) != len(node_count):
862+
data_vars[names.part_node_count] = (names.part_dim, part_node_count)
863+
geometry_attrs["part_node_count"] = names.part_node_count
864+
865+
# Special case when we have no holes
866+
if interior_ring.any():
867+
data_vars[names.interior_ring] = (names.part_dim, interior_ring)
868+
geometry_attrs["interior_ring"] = names.interior_ring
869+
870+
data_vars[names.container_name] = ( # type: ignore[assignment]
871+
(),
872+
np.nan,
873+
{"geometry_type": "polygon", **geometry_attrs},
874+
)
855875
ds = xr.Dataset(
856-
data_vars={
857-
names.node_count: xr.DataArray(node_count, dims=(dim,)),
858-
names.container_name: xr.DataArray(
859-
data=np.nan,
860-
attrs={"geometry_type": "polygon", **names.geometry_container_attrs},
861-
),
862-
},
876+
data_vars=data_vars,
863877
coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim),
864878
)
865879

866880
if coord is not None:
867881
ds = ds.assign_coords({dim: coord})
868882

869-
# Special case when we have no MultiPolygons and no holes
870-
if len(part_node_count) != len(node_count):
871-
ds[names.part_node_count] = xr.DataArray(part_node_count, dims=names.part_dim)
872-
ds[names.container_name].attrs["part_node_count"] = names.part_node_count
873-
874-
# Special case when we have no holes
875-
if (interior_ring != 0).any():
876-
ds[names.interior_ring] = xr.DataArray(interior_ring, dims=names.part_dim)
877-
ds[names.container_name].attrs["interior_ring"] = names.interior_ring
878883
return ds
879884

880885

cf_xarray/tests/test_geometry.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ def test_shapely_to_cf_errors():
352352
Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]),
353353
Point(1, 2),
354354
]
355-
with pytest.raises(ValueError, match="Mixed geometry types are not supported"):
355+
with pytest.raises(ValueError, match="Geometry type combination"):
356356
cfxr.shapely_to_cf(geoms)
357357

358358
encoded = cfxr.shapely_to_cf(

0 commit comments

Comments
 (0)