Skip to content

Commit

Permalink
[SEDONA-702] Create IndexedGridPartitioner, Support preserveUncontain… (
Browse files Browse the repository at this point in the history
#1769)

* [SEDONA-702] Create IndexedGridPartitioner, Support preserveUncontainedGeometries in *GridPartitioners

* add docstrings to the GridPartitioner classes

* remove extra covers call

---------

Co-authored-by: jameswillis <[email protected]>
  • Loading branch information
james-willis and jameswillis authored Jan 25, 2025
1 parent 211e3c7 commit 38b0fc9
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,19 @@ public class EqualPartitioning implements Serializable {
/** The grids. */
List<Envelope> grids = new ArrayList<Envelope>();

public EqualPartitioning(List<Envelope> grids) {
/**
* Whether to discard geometries that do not intersect any grid. If true, geometries that are not
* contained in a grid are placed into the overflow container.
*/
Boolean preserveUncontainedGeometries;

public EqualPartitioning(List<Envelope> grids, boolean preserveUncontainedGeometries) {
this.grids = grids;
this.preserveUncontainedGeometries = preserveUncontainedGeometries;
}

public EqualPartitioning(List<Envelope> grids) {
this(grids, true);
}
/**
* Instantiates a new equal partitioning.
Expand Down Expand Up @@ -100,12 +111,12 @@ public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry geometry) {
if (grid.covers(envelope)) {
result.add(new Tuple2(i, geometry));
containFlag = true;
} else if (grid.intersects(envelope) || envelope.covers(grid)) {
} else if (grid.intersects(envelope)) {
result.add(new Tuple2<>(i, geometry));
}
}

if (!containFlag) {
if (!containFlag && preserveUncontainedGeometries) {
result.add(new Tuple2<>(overflowContainerID, geometry));
}

Expand Down Expand Up @@ -133,7 +144,7 @@ public Set<Integer> getKeys(Geometry geometry) {
}
}

if (!containFlag) {
if (!containFlag && preserveUncontainedGeometries) {
result.add(overflowContainerID);
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,39 @@
import org.locationtech.jts.geom.Geometry;
import scala.Tuple2;

/**
* The FlatGridPartitioner is used when there is already a set of grids which the data should be
* partitioned into. It iterates through all the grids to find the grids to place a geometry into.
* Unless you have very few objects to place, it may make more sense to use the
* IndexedGridPartitioner. If you do not have a strict requirement to use a specific set of grids,
* it may make more sense to use another partitioner that generates its own grids from a
* space-partitioning tree, e.g. the KDBTreePartitioner or the QuadTreePartitioner.
*/
public class FlatGridPartitioner extends SpatialPartitioner {
public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
protected final Boolean preserveUncontainedGeometries;

public FlatGridPartitioner(
GridType gridType, List<Envelope> grids, Boolean preserveUncontainedGeometries) {
super(gridType, grids);
this.preserveUncontainedGeometries = preserveUncontainedGeometries;
}

public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
this(gridType, grids, true);
}

public FlatGridPartitioner(List<Envelope> grids, Boolean preserveUncontainedGeometries) {
this(null, grids, preserveUncontainedGeometries);
}

// For backwards compatibility (see SpatialRDD.spatialPartitioning(otherGrids))
public FlatGridPartitioner(List<Envelope> grids) {
super(null, grids);
this(null, grids);
}

@Override
public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry spatialObject) throws Exception {
EqualPartitioning partitioning = new EqualPartitioning(grids);
EqualPartitioning partitioning = new EqualPartitioning(grids, preserveUncontainedGeometries);
return partitioning.placeObject(spatialObject);
}

Expand All @@ -61,7 +81,7 @@ public DedupParams getDedupParams() {

@Override
public int numPartitions() {
return grids.size() + 1 /* overflow partition */;
return grids.size() + (preserveUncontainedGeometries ? 1 : 0);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.core.spatialPartitioning;

import java.util.*;
import org.apache.sedona.core.enums.GridType;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.index.strtree.STRtree;
import scala.Tuple2;

/**
* The IndexedGridPartitioner is used when there is already a set of grids which the data should be
* partitioned into. It leverages an STRTree to quickly find the grids to place a geometry into. If
* you have very few objects to place, it may make more sense to use the FlatGridPartitioner. If you
* do not have a strict requirement to use a specific set of grids, it may make more sense to use
* another partitioner that generates its own grids from space-partitioning tree, e.g. the
* KDBTreePartitioner or the QuadTreePartitioner.
*/
public class IndexedGridPartitioner extends FlatGridPartitioner {
private final STRtree index;

public IndexedGridPartitioner(
GridType gridType, List<Envelope> grids, Boolean preserveUncontainedGeometries) {
super(gridType, grids, preserveUncontainedGeometries);
this.index = new STRtree();
for (int i = 0; i < grids.size(); i++) {
final Envelope grid = grids.get(i);
index.insert(grid, i);
}
index.build();
}

public IndexedGridPartitioner(GridType gridType, List<Envelope> grids) {
this(gridType, grids, false);
}

public IndexedGridPartitioner(List<Envelope> grids, Boolean preserveUncontainedGeometries) {
this(null, grids, preserveUncontainedGeometries);
}

public IndexedGridPartitioner(List<Envelope> grids) {
this(null, grids);
}

public STRtree getIndex() {
return index;
}

@Override
public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry spatialObject) throws Exception {
List results = index.query(spatialObject.getEnvelopeInternal());
if (preserveUncontainedGeometries) {
// borrowed from EqualPartitioning.placeObject
final int overflowContainerID = grids.size();
final Envelope envelope = spatialObject.getEnvelopeInternal();

Set<Tuple2<Integer, Geometry>> result = new HashSet();
boolean containFlag = false;
for (Object queryResult : results) {
Integer i = (Integer) queryResult;
final Envelope grid = grids.get(i);
if (grid.covers(envelope)) {
result.add(new Tuple2(i, spatialObject));
containFlag = true;
} else if (grid.intersects(envelope)) {
result.add(new Tuple2<>(i, spatialObject));
}
}

if (!containFlag) {
result.add(new Tuple2<>(overflowContainerID, spatialObject));
}

return result.iterator();
} else {
return results.stream().map(i -> new Tuple2(i, spatialObject)).iterator();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.core.spatialPartitioning;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import junit.framework.TestCase;
import org.junit.Assert;
import org.junit.Test;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import scala.Tuple2;

public class IndexedGridPartitionerTest extends TestCase {

private List<Envelope> getGrids() {
List<Envelope> grids = new ArrayList<>();
grids.add(new Envelope(0, 50, 0, 50));
grids.add(new Envelope(50, 100, 0, 50));
grids.add(new Envelope(0, 50, 50, 100));
grids.add(new Envelope(50, 100, 50, 100));
return grids;
}

private IndexedGridPartitioner getPartitioner(Boolean preserveUncontainedGeometries) {
return new IndexedGridPartitioner(getGrids(), preserveUncontainedGeometries);
}

public void testPlaceObjectPreserveContainedGeometries() throws Exception {
IndexedGridPartitioner partitioner = getPartitioner(true);
GeometryFactory geometryFactory = new GeometryFactory();
Geometry spatialObject = geometryFactory.createPoint(new Coordinate(25, 25));
Iterator<Tuple2<Integer, Geometry>> result = partitioner.placeObject(spatialObject);

List<Tuple2<Integer, Geometry>> resultList = new ArrayList<>();
result.forEachRemaining(resultList::add);

Assert.assertFalse(resultList.isEmpty());
Assert.assertEquals(1, resultList.size());
Assert.assertEquals(0, (int) resultList.get(0)._1());
}

public void testPlaceObjectDoesntPreserveUncontainedGeometries() throws Exception {
IndexedGridPartitioner partitioner = getPartitioner(false);
GeometryFactory geometryFactory = new GeometryFactory();
Geometry spatialObject = geometryFactory.createPoint(new Coordinate(-25, -25));
Iterator<Tuple2<Integer, Geometry>> result = partitioner.placeObject(spatialObject);
Assert.assertFalse(result.hasNext());
}

@Test
public void testGetGrids() {
IndexedGridPartitioner partitioner = getPartitioner(true);
Assert.assertEquals(getGrids(), partitioner.getGrids());
}

@Test
public void testNumPartitions() {
IndexedGridPartitioner partitioner = getPartitioner(true);
Assert.assertEquals(5, partitioner.numPartitions());

partitioner = getPartitioner(false);
Assert.assertEquals(4, partitioner.numPartitions());
}

@Test
public void testEquals() {
IndexedGridPartitioner partitioner = getPartitioner(true);
List<Envelope> grids = new ArrayList<>();
grids.add(new Envelope(0, 50, 0, 50));
grids.add(new Envelope(50, 100, 0, 50));
grids.add(new Envelope(0, 50, 50, 100));
grids.add(new Envelope(50, 100, 50, 100));
IndexedGridPartitioner otherPartitioner = new IndexedGridPartitioner(grids, true);
Assert.assertTrue(partitioner.equals(otherPartitioner));
}
}

0 comments on commit 38b0fc9

Please sign in to comment.