diff --git a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java index 94793fa68a..2d1b95eb75 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java +++ b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java @@ -37,8 +37,19 @@ public class EqualPartitioning implements Serializable { /** The grids. */ List grids = new ArrayList(); - public EqualPartitioning(List grids) { + /** + * Whether to discard geometries that do not intersect any grid. If true, geometries that are not + * contained in a grid are placed into the overflow container. + */ + Boolean preserveUncontainedGeometries; + + public EqualPartitioning(List grids, boolean preserveUncontainedGeometries) { this.grids = grids; + this.preserveUncontainedGeometries = preserveUncontainedGeometries; + } + + public EqualPartitioning(List grids) { + this(grids, true); } /** * Instantiates a new equal partitioning. @@ -100,12 +111,12 @@ public Iterator> placeObject(Geometry geometry) { if (grid.covers(envelope)) { result.add(new Tuple2(i, geometry)); containFlag = true; - } else if (grid.intersects(envelope) || envelope.covers(grid)) { + } else if (grid.intersects(envelope)) { result.add(new Tuple2<>(i, geometry)); } } - if (!containFlag) { + if (!containFlag && preserveUncontainedGeometries) { result.add(new Tuple2<>(overflowContainerID, geometry)); } @@ -133,7 +144,7 @@ public Set getKeys(Geometry geometry) { } } - if (!containFlag) { + if (!containFlag && preserveUncontainedGeometries) { result.add(overflowContainerID); } return result; diff --git a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java index e962a965ee..a50ce43f09 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java +++ b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java @@ -27,19 +27,39 @@ import org.locationtech.jts.geom.Geometry; import scala.Tuple2; +/** + * The FlatGridPartitioner is used when there is already a set of grids which the data should be + * partitioned into. It iterates through all the grids to find the grids to place a geometry into. + * Unless you have very few objects to place, it may make more sense to use the + * IndexedGridPartitioner. If you do not have a strict requirement to use a specific set of grids, + * it may make more sense to use another partitioner that generates its own grids from a + * space-partitioning tree, e.g. the KDBTreePartitioner or the QuadTreePartitioner. + */ public class FlatGridPartitioner extends SpatialPartitioner { - public FlatGridPartitioner(GridType gridType, List grids) { + protected final Boolean preserveUncontainedGeometries; + + public FlatGridPartitioner( + GridType gridType, List grids, Boolean preserveUncontainedGeometries) { super(gridType, grids); + this.preserveUncontainedGeometries = preserveUncontainedGeometries; + } + + public FlatGridPartitioner(GridType gridType, List grids) { + this(gridType, grids, true); + } + + public FlatGridPartitioner(List grids, Boolean preserveUncontainedGeometries) { + this(null, grids, preserveUncontainedGeometries); } // For backwards compatibility (see SpatialRDD.spatialPartitioning(otherGrids)) public FlatGridPartitioner(List grids) { - super(null, grids); + this(null, grids); } @Override public Iterator> placeObject(Geometry spatialObject) throws Exception { - EqualPartitioning partitioning = new EqualPartitioning(grids); + EqualPartitioning partitioning = new EqualPartitioning(grids, preserveUncontainedGeometries); return partitioning.placeObject(spatialObject); } @@ -61,7 +81,7 @@ public DedupParams getDedupParams() { @Override public int numPartitions() { - return grids.size() + 1 /* overflow partition */; + return grids.size() + (preserveUncontainedGeometries ? 1 : 0); } @Override diff --git a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java new file mode 100644 index 0000000000..79073f2320 --- /dev/null +++ b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.core.spatialPartitioning; + +import java.util.*; +import org.apache.sedona.core.enums.GridType; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.index.strtree.STRtree; +import scala.Tuple2; + +/** + * The IndexedGridPartitioner is used when there is already a set of grids which the data should be + * partitioned into. It leverages an STRTree to quickly find the grids to place a geometry into. If + * you have very few objects to place, it may make more sense to use the FlatGridPartitioner. If you + * do not have a strict requirement to use a specific set of grids, it may make more sense to use + * another partitioner that generates its own grids from space-partitioning tree, e.g. the + * KDBTreePartitioner or the QuadTreePartitioner. + */ +public class IndexedGridPartitioner extends FlatGridPartitioner { + private final STRtree index; + + public IndexedGridPartitioner( + GridType gridType, List grids, Boolean preserveUncontainedGeometries) { + super(gridType, grids, preserveUncontainedGeometries); + this.index = new STRtree(); + for (int i = 0; i < grids.size(); i++) { + final Envelope grid = grids.get(i); + index.insert(grid, i); + } + index.build(); + } + + public IndexedGridPartitioner(GridType gridType, List grids) { + this(gridType, grids, false); + } + + public IndexedGridPartitioner(List grids, Boolean preserveUncontainedGeometries) { + this(null, grids, preserveUncontainedGeometries); + } + + public IndexedGridPartitioner(List grids) { + this(null, grids); + } + + public STRtree getIndex() { + return index; + } + + @Override + public Iterator> placeObject(Geometry spatialObject) throws Exception { + List results = index.query(spatialObject.getEnvelopeInternal()); + if (preserveUncontainedGeometries) { + // borrowed from EqualPartitioning.placeObject + final int overflowContainerID = grids.size(); + final Envelope envelope = spatialObject.getEnvelopeInternal(); + + Set> result = new HashSet(); + boolean containFlag = false; + for (Object queryResult : results) { + Integer i = (Integer) queryResult; + final Envelope grid = grids.get(i); + if (grid.covers(envelope)) { + result.add(new Tuple2(i, spatialObject)); + containFlag = true; + } else if (grid.intersects(envelope)) { + result.add(new Tuple2<>(i, spatialObject)); + } + } + + if (!containFlag) { + result.add(new Tuple2<>(overflowContainerID, spatialObject)); + } + + return result.iterator(); + } else { + return results.stream().map(i -> new Tuple2(i, spatialObject)).iterator(); + } + } +} diff --git a/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java new file mode 100644 index 0000000000..cedd94eadb --- /dev/null +++ b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.core.spatialPartitioning; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import junit.framework.TestCase; +import org.junit.Assert; +import org.junit.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import scala.Tuple2; + +public class IndexedGridPartitionerTest extends TestCase { + + private List getGrids() { + List grids = new ArrayList<>(); + grids.add(new Envelope(0, 50, 0, 50)); + grids.add(new Envelope(50, 100, 0, 50)); + grids.add(new Envelope(0, 50, 50, 100)); + grids.add(new Envelope(50, 100, 50, 100)); + return grids; + } + + private IndexedGridPartitioner getPartitioner(Boolean preserveUncontainedGeometries) { + return new IndexedGridPartitioner(getGrids(), preserveUncontainedGeometries); + } + + public void testPlaceObjectPreserveContainedGeometries() throws Exception { + IndexedGridPartitioner partitioner = getPartitioner(true); + GeometryFactory geometryFactory = new GeometryFactory(); + Geometry spatialObject = geometryFactory.createPoint(new Coordinate(25, 25)); + Iterator> result = partitioner.placeObject(spatialObject); + + List> resultList = new ArrayList<>(); + result.forEachRemaining(resultList::add); + + Assert.assertFalse(resultList.isEmpty()); + Assert.assertEquals(1, resultList.size()); + Assert.assertEquals(0, (int) resultList.get(0)._1()); + } + + public void testPlaceObjectDoesntPreserveUncontainedGeometries() throws Exception { + IndexedGridPartitioner partitioner = getPartitioner(false); + GeometryFactory geometryFactory = new GeometryFactory(); + Geometry spatialObject = geometryFactory.createPoint(new Coordinate(-25, -25)); + Iterator> result = partitioner.placeObject(spatialObject); + Assert.assertFalse(result.hasNext()); + } + + @Test + public void testGetGrids() { + IndexedGridPartitioner partitioner = getPartitioner(true); + Assert.assertEquals(getGrids(), partitioner.getGrids()); + } + + @Test + public void testNumPartitions() { + IndexedGridPartitioner partitioner = getPartitioner(true); + Assert.assertEquals(5, partitioner.numPartitions()); + + partitioner = getPartitioner(false); + Assert.assertEquals(4, partitioner.numPartitions()); + } + + @Test + public void testEquals() { + IndexedGridPartitioner partitioner = getPartitioner(true); + List grids = new ArrayList<>(); + grids.add(new Envelope(0, 50, 0, 50)); + grids.add(new Envelope(50, 100, 0, 50)); + grids.add(new Envelope(0, 50, 50, 100)); + grids.add(new Envelope(50, 100, 50, 100)); + IndexedGridPartitioner otherPartitioner = new IndexedGridPartitioner(grids, true); + Assert.assertTrue(partitioner.equals(otherPartitioner)); + } +}