Skip to content

Commit

Permalink
fix(model): Rework purl conversion according to the specs
Browse files Browse the repository at this point in the history
Implement the pseudo-algorithm described at [1]. Most importantly, '/'
in namespaces are now not escaped anymore (also see the lengthy
discussion at [2]), key names are lower-cased, and qualifiers are sorted
for comparability.

[1]: https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#how-to-build-purl-string-from-its-components
[2]: package-url/purl-spec#176

Signed-off-by: Sebastian Schuberth <[email protected]>
  • Loading branch information
sschuberth committed Oct 23, 2024
1 parent 036bcb4 commit 6978919
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 21 deletions.
3 changes: 1 addition & 2 deletions model/src/main/kotlin/utils/PurlExtensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,8 @@ fun String.toProvenance(): Provenance {
"download_url=" in extras -> {
val encodedUrl = getQualifierValue("download_url")

val percentEncodedColon = "%3A"
val checksum = getQualifierValue("checksum")
val (algorithm, value) = checksum.split(percentEncodedColon, limit = 2)
val (algorithm, value) = checksum.split(':', limit = 2)

ArtifactProvenance(
sourceArtifact = RemoteArtifact(
Expand Down
37 changes: 27 additions & 10 deletions model/src/main/kotlin/utils/PurlUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

package org.ossreviewtoolkit.model.utils

import org.ossreviewtoolkit.model.HashAlgorithm
import org.ossreviewtoolkit.utils.common.percentEncode

/**
Expand Down Expand Up @@ -92,28 +93,44 @@ internal fun createPurl(
): String =
buildString {
append("pkg:")
append(type)
append(type.lowercase())
append('/')

if (namespace.isNotEmpty()) {
append(namespace.trim('/').split('/').joinToString("/") { it.percentEncode() })
append('/')
append(namespace.percentEncode())
append(name.trim('/').percentEncode())
} else {
append(name.percentEncode())
}

append('/')
append(name.percentEncode())
if (version.isNotEmpty()) {
append('@')

append('@')
append(version.percentEncode())
// See https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#character-encoding which
// says "the '#', '?', '@' and ':' characters must NOT be encoded when used as separators".
val isChecksum = HashAlgorithm.VERIFIABLE.any { version.startsWith("${it.name.lowercase()}:") }
if (isChecksum) append(version) else append(version.percentEncode())
}

qualifiers.onEachIndexed { index, entry ->
qualifiers.filterValues { it.isNotEmpty() }.toSortedMap().onEachIndexed { index, entry ->
if (index == 0) append("?") else append("&")
append(entry.key.percentEncode())

val key = entry.key.lowercase()
append(key)

append("=")
append(entry.value.percentEncode())

// See https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs.
if (key in KNOWN_KEYS) append(entry.value) else append(entry.value.percentEncode())
}

if (subpath.isNotEmpty()) {
val value = subpath.split('/').joinToString("/", prefix = "#") { it.percentEncode() }
val value = subpath.trim('/').split('/')
.filter { it.isNotEmpty() && it != "." && it != ".." }
.joinToString("/", prefix = "#") { it.percentEncode() }
append(value)
}
}

private val KNOWN_KEYS = setOf("repository_url", "download_url", "vcs_url", "file_name", "checksum")
16 changes: 8 additions & 8 deletions model/src/test/kotlin/utils/PurlExtensionsTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ class PurlExtensionsTest : WordSpec({
purl shouldBe "pkg:generic/name@version"
}

"percent-encode namespaces with segments" {
val purl = Identifier("generic", "name/space", "name", "version").toPurl()
"percent-encode namespace segments" {
val purl = Identifier("generic", "name space/with spaces", "name", "version").toPurl()

purl shouldBe "pkg:generic/name%2Fspace/name@version"
purl shouldBe "pkg:generic/name%20space/with%20spaces/name@version"
}

"percent-encode the name" {
Expand Down Expand Up @@ -104,8 +104,8 @@ class PurlExtensionsTest : WordSpec({
val purl = id.toPurl(extras.qualifiers, extras.subpath)

purl shouldBe "pkg:maven/com.example/[email protected]?" +
"download_url=https%3A%2F%2Fexample.com%2Fsources.zip&" +
"checksum=md5%3Addce269a1e3d054cae349621c198dd52"
"checksum=md5:ddce269a1e3d054cae349621c198dd52&" +
"download_url=https://example.com/sources.zip"
purl.toProvenance() shouldBe provenance
}

Expand All @@ -125,10 +125,10 @@ class PurlExtensionsTest : WordSpec({
val purl = id.toPurl(extras.qualifiers, extras.subpath)

purl shouldBe "pkg:maven/com.example/[email protected]?" +
"vcs_type=Git&" +
"vcs_url=https%3A%2F%2Fgithub.com%2Fapache%2Fcommons-text.git&" +
"resolved_revision=7643b12421100d29fd2b78053e77bcb04a251b2e&" +
"vcs_revision=7643b12421100d29fd2b78053e77bcb04a251b2e&" +
"resolved_revision=7643b12421100d29fd2b78053e77bcb04a251b2e" +
"vcs_type=Git&" +
"vcs_url=https://github.com/apache/commons-text.git" +
"#subpath"
purl.toProvenance() shouldBe provenance
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ packages:
revision: "eb51f949cdd0c9d88abba9ce79d37eb7ea1231d0"
path: ""
- id: "Swift::github.com/apple/swift-crypto:"
purl: "pkg:swift/github.com%2Fapple%2Fswift-crypto@"
purl: "pkg:swift/github.com%2Fapple%2Fswift-crypto"
declared_licenses: []
declared_licenses_processed: {}
description: ""
Expand Down

0 comments on commit 6978919

Please sign in to comment.