Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prefer non-GML namespace on multiple candidates for property #118

Merged
merged 2 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

/*
* Copyright (c) 2024 wetransform GmbH
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*/
package eu.esdihumboldt.hale.common.schema;

public interface SchemaConstants {

/**
* The core part of the GML namespace that is independent of the version number
* (and is the namespace of GML versions up to 3.1.1).
*/
public static final String GML_NAMESPACE_CORE = "http://www.opengis.net/gml";

}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:planAtlas-="https://www.geoportal-raumordnung-bw.de/planAtlas-rp" elementFormDefault="qualified" targetNamespace="https://www.geoportal-raumordnung-bw.de/planAtlas-rp">
<import namespace="http://www.opengis.net/gml/3.2" schemaLocation="http://schemas.opengis.net/gml/3.2.1/gml.xsd"/>
<element name="ING-4543" substitutionGroup="gml:AbstractFeature" type="planAtlas-:ING-4543"/>
<complexType name="ING-4543">
<complexContent>
<extension base="gml:AbstractFeatureType">
<sequence>
<element maxOccurs="1" minOccurs="0" name="bereich" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="oa_nr" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="oa_nr_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="klasse" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="klasse_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="bindung" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="bindung_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="status" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="status_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="kategorie" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="kategori_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="erst_id" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="erst_id_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="name" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="bemerkung" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="inkraft" nillable="false" type="date"/>
<element maxOccurs="1" minOccurs="0" name="massstab" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="nr" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="id" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="geometrie" nillable="false" type="gml:MultiCurvePropertyType"/>
</sequence>
</extension>
</complexContent>
</complexType>
</schema>
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
*/
package eu.esdihumboldt.hale.io.geopackage.internal

import static eu.esdihumboldt.hale.common.schema.SchemaConstants.GML_NAMESPACE_CORE

import groovy.transform.CompileStatic

import java.sql.SQLException
Expand Down Expand Up @@ -102,6 +104,16 @@ class TableInstanceBuilder {
cand = candidates.find { it.getConstraint(Cardinality).getMinOccurs() > 0 && !it.getConstraint(NillableFlag).isEnabled() }
}

if (cand == null && candidates.size() == 2) {
// prefer property that is not in GML namespace, if the other is in a GML namespace
if (candidates[0].name.namespaceURI.startsWith(GML_NAMESPACE_CORE)) {
cand = candidates[1]
}
else if (candidates[1].name.namespaceURI.startsWith(GML_NAMESPACE_CORE)) {
cand = candidates[0]
}
}

if (cand == null) {
// just pick the first one
cand = candidates[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import groovy.transform.TypeCheckingMode

import java.util.function.Consumer

import javax.xml.namespace.QName

import org.junit.Test
import org.locationtech.jts.geom.Geometry

Expand All @@ -36,6 +38,7 @@ import eu.esdihumboldt.hale.common.schema.geometry.GeometryProperty
import eu.esdihumboldt.hale.common.schema.model.Schema
import eu.esdihumboldt.hale.common.test.TestUtil
import eu.esdihumboldt.util.test.AbstractPlatformTest
import io.qameta.allure.Link

/**
* Tests loading GeoPackage instances.
Expand Down Expand Up @@ -261,4 +264,64 @@ class GeopackageInstanceReaderTest extends AbstractPlatformTest {
.isNotNull()
} as Consumer)
}

/**
* Test reading Geopackage data based on an XML schema (Use case: Rewrite Geopackage as GML).
*/
@CompileStatic(TypeCheckingMode.SKIP)
@Link(value = "ING-4543", type = "JIRA")
@Test
public void testReadInstancesXsdGmlConflict() {
GeopackageApiTest.withTestFile("testdata/gml-conflict.gpkg") { File file ->
Schema xmlSchema = TestUtil.loadSchema(getClass().getClassLoader().getResource("testdata/gml-conflict.xsd").toURI())

InstanceCollection instances = loadInstances(xmlSchema, file)

assertNotNull(instances)
assertTrue(instances.hasSize())

// test count
assertEquals(3, instances.size())

// check instances
def list = instances.toList()
def all = { new InstanceAccessor(list) }

def expectedNames = [
'Allgäubahn',
'Bodensee-Gürtelbahn',
'Donaubahn'
].toArray()

// verify that names are present
assertThat(all().name.values())
.containsExactlyInAnyOrder(expectedNames)

// verify that correct name property is used
def nameProperty = new QName('https://www.geoportal-raumordnung-bw.de/planAtlas-rp', 'name')
def names = list.collect { it.getProperty(nameProperty)[0] }.toList()
assertThat(names)
.containsExactlyInAnyOrder(expectedNames)

// also check for id element (which is present in the schema in addition to the GML id attribute)
def idProperty = new QName('https://www.geoportal-raumordnung-bw.de/planAtlas-rp', 'id')
def ids = list.collect { it.getProperty(idProperty)[0] }.toList()
assertThat(ids)
.containsExactlyInAnyOrder('ln_8164_843_1', 'ln_8164_843_2', 'ln_8164_843_3')

// GML id attribute should be empty
def gmlIdProperty = new QName('http://www.opengis.net/gml/3.2', 'id')
def gmlIds = list.collect {
def prop = it.getProperty(gmlIdProperty)
if (prop) {
prop[0]
}
else {
null
}
}.findAll()
assertThat(gmlIds)
.isEmpty()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["ETRS89 / UTM zone 32N", GEOGCS["ETRS89", DATUM["European Terrestrial Reference System 1989", SPHEROID["GRS 1980", 6378137.0, 298.257222101, AUTHORITY["EPSG","7019"]], AUTHORITY["EPSG","6258"]], PRIMEM["Greenwich", 0.0, AUTHORITY["EPSG","8901"]], UNIT["degree", 0.017453292519943295], AXIS["Geodetic latitude", NORTH], AXIS["Geodetic longitude", EAST], AUTHORITY["EPSG","4258"]], PROJECTION["Transverse_Mercator", AUTHORITY["EPSG","9807"]], PARAMETER["central_meridian", 9.0], PARAMETER["latitude_of_origin", 0.0], PARAMETER["scale_factor", 0.9996], PARAMETER["false_easting", 500000.0], PARAMETER["false_northing", 0.0], UNIT["m", 1.0], AXIS["Easting", EAST], AXIS["Northing", NORTH], AUTHORITY["EPSG","25832"]]
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:planAtlas-="https://www.geoportal-raumordnung-bw.de/planAtlas-rp" elementFormDefault="qualified" targetNamespace="https://www.geoportal-raumordnung-bw.de/planAtlas-rp">
<import namespace="http://www.opengis.net/gml/3.2" schemaLocation="http://schemas.opengis.net/gml/3.2.1/gml.xsd"/>
<element name="ln_8164_schiene_trasse_ausbau" substitutionGroup="gml:AbstractFeature" type="planAtlas-:ln_8164_schiene_trasse_ausbau"/>
<complexType name="ln_8164_schiene_trasse_ausbau">
<complexContent>
<extension base="gml:AbstractFeatureType">
<sequence>
<element maxOccurs="1" minOccurs="0" name="bereich" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="oa_nr" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="oa_nr_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="klasse" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="klasse_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="bindung" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="bindung_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="status" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="status_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="kategorie" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="kategori_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="erst_id" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="erst_id_t" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="name" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="bemerkung" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="inkraft" nillable="false" type="date"/>
<element maxOccurs="1" minOccurs="0" name="massstab" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="nr" nillable="false" type="integer"/>
<element maxOccurs="1" minOccurs="0" name="id" nillable="false" type="string"/>
<element maxOccurs="1" minOccurs="0" name="geometrie" nillable="false" type="gml:MultiCurvePropertyType"/>
</sequence>
</extension>
</complexContent>
</complexType>
</schema>
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
*/
package eu.esdihumboldt.hale.io.shp.reader.internal;

import static eu.esdihumboldt.hale.common.schema.SchemaConstants.GML_NAMESPACE_CORE;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -183,12 +185,31 @@ private Instance createInstance(TypeDefinition type, SimpleFeature feature) {
// the source property's name
propertyName = candidates.get(0).getName();
}
else if (candidates.size() == 2) {
// handle special case:
// if one candidate is from GML namespace, choose the other
if (candidates.get(0).getName().getNamespaceURI()
.startsWith(GML_NAMESPACE_CORE)) {
propertyName = candidates.get(1).getName();
}
else if (candidates.get(1).getName().getNamespaceURI()
.startsWith(GML_NAMESPACE_CORE)) {
propertyName = candidates.get(0).getName();
}
}

if (propertyName == null) {
if (!missingProperties.contains(lookForPropertyName)) {
log.warn("Discarding values of property "
+ lookForPropertyName.getLocalPart()
+ " as it is not contained in the schema type.");
if (candidates.isEmpty()) {
log.warn("Discarding values of property "
+ lookForPropertyName.getLocalPart()
+ " as it is not contained in the schema type.");
}
else {
log.warn("Discarding values of property "
+ lookForPropertyName.getLocalPart()
+ " as there is no unique match in the schema type.");
}
missingProperties.add(lookForPropertyName);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.function.Consumer

import javax.xml.namespace.QName

import org.apache.commons.io.IOUtils
import org.junit.Test
import org.locationtech.jts.geom.Geometry
Expand All @@ -39,6 +41,8 @@ import eu.esdihumboldt.hale.common.schema.model.Schema
import eu.esdihumboldt.hale.common.test.TestUtil
import eu.esdihumboldt.hale.io.shp.reader.internal.ShapeInstanceReader
import eu.esdihumboldt.util.test.AbstractPlatformTest
import io.qameta.allure.Issue
import io.qameta.allure.Link

/**
* Tests for reading Shapefiles.
Expand Down Expand Up @@ -204,6 +208,66 @@ class ShapeInstanceReaderTest extends AbstractPlatformTest {
} as Consumer)
}

/**
* Test reading Shapefile instances using an XML schema.
*
* Special is that an element "name" is defined in the XSD, which should be used, but there is also the "name" attribute defined in the GML base type.
* Original behavior was that neither property is populated because no unique property could be identified.
*/
@Test
@Link(value = "ING-4543", type = "JIRA")
@CompileStatic(TypeCheckingMode.SKIP)
void testReadXsdInstancesName() {
Schema xmlSchema = TestUtil.loadSchema(getClass().getClassLoader().getResource("testdata/shape-xsd-name/shape.xsd").toURI())

InstanceCollection instances = loadInstances(xmlSchema, getClass().getClassLoader().getResource("testdata/shape-xsd-name/shape.shp").toURI())

assertNotNull(instances)
List<Instance> list = instances.toList()

// test count
assertThat(list).hasSize(3)

// check instances
def all = { new InstanceAccessor(list) }

def expectedNames = [
'Allgäubahn',
'Bodensee-Gürtelbahn',
'Donaubahn'
].toArray()

// verify that names are present
assertThat(all().name.values())
.containsExactlyInAnyOrder(expectedNames)

// verify that correct name property is used
def nameProperty = new QName('https://www.geoportal-raumordnung-bw.de/planAtlas-rp', 'name')
def names = list.collect { it.getProperty(nameProperty)[0] }.toList()
assertThat(names)
.containsExactlyInAnyOrder(expectedNames)

// also check for id element (which is present in the schema in addition to the GML id attribute)
def idProperty = new QName('https://www.geoportal-raumordnung-bw.de/planAtlas-rp', 'id')
def ids = list.collect { it.getProperty(idProperty)[0] }.toList()
assertThat(ids)
.containsExactlyInAnyOrder('ln_8164_843_1', 'ln_8164_843_2', 'ln_8164_843_3')

// GML id attribute should be empty
def gmlIdProperty = new QName('http://www.opengis.net/gml/3.2', 'id')
def gmlIds = list.collect {
def prop = it.getProperty(gmlIdProperty)
if (prop) {
prop[0]
}
else {
null
}
}.findAll()
assertThat(gmlIds)
.isEmpty()
}

// helpers

/**
Expand Down