Skip to content

Commit

Permalink
feat(linter): expand package checks across multiple ecosystems (#322)
Browse files Browse the repository at this point in the history
This PR enhances the linter by adding package existence checks for a
variety of ecosystems:
- crates.io
- npm
- NuGet
- RubyGems
- Packagist
- Pub
- Hackage
- Maven

To accommodate these additions, the `ecosystems.go` file has been
refactored, with code related to package existence checks moved to
`package_check.go` and version checks moved to `version_check.go`.

A current limitation is that malicious package record validation may
fail, as these packages are often removed from package registries and
are no longer queryable via API.

---------

Signed-off-by: Holly Gong <[email protected]>
  • Loading branch information
hogo6002 authored Feb 24, 2025
1 parent ab8a4f6 commit fa0a097
Show file tree
Hide file tree
Showing 4 changed files with 659 additions and 232 deletions.
262 changes: 30 additions & 232 deletions tools/osv-linter/internal/pkgchecker/ecosystems.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,34 @@ package pkgchecker

import (
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strings"

"golang.org/x/mod/module"
"golang.org/x/mod/semver"

"github.com/ossf/osv-schema/linter/internal/faulttolerant"

pep440 "github.com/aquasecurity/go-pep440-version"

"github.com/tidwall/gjson"
)

// Ecosystem support is a work in progress.
var SupportedEcosystems = []string{
"Go",
"PyPI",
"crates.io",
"npm",
"NuGet",
"RubyGems",
"Packagist",
"Pub",
"Hackage",
"Maven",
}

// EcosystemBaseURLs maps ecosystems to their base API URLs.
var EcosystemBaseURLs = map[string]string{
"Go": "https://proxy.golang.org",
"PyPI": "https://pypi.org/pypi",
"crates.io": "https://crates.io/api/v1/crates",
"npm": "https://registry.npmjs.org",
"NuGet": "https://api.nuget.org/v3-flatcontainer",
"RubyGems": "https://rubygems.org/api/v1/gems",
"Packagist": "https://repo.packagist.org/p2",
"Pub": "https://pub.dev/api/packages",
"Hackage": "https://hackage.haskell.org/package",
"Maven": "https://search.maven.org/solrsearch/select",
}

// Dispatcher for ecosystem-specific package existence checking.
Expand All @@ -40,7 +48,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool {
case "CRAN":
return true
case "crates.io":
return true
return existsInCrates(pkg)
case "Debian":
return true
case "GIT":
Expand All @@ -52,35 +60,35 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool {
case "GSD":
return true
case "Hackage":
return true
return existsInHackage(pkg)
case "Hex":
return true
case "Kubernetes":
return true
case "Linux":
return true
case "Maven":
return true
return existsInMaven(pkg)
case "npm":
return true
return existsInNpm(pkg)
case "NuGet":
return true
return existsInNuget(pkg)
case "openSUSE":
return true
case "OSS-Fuzz":
return true
case "Packagist":
return true
return existsInPackagist(pkg)
case "Pub":
return true
return existsInPub(pkg)
case "PyPI":
return existsInPyPI(pkg)
case "Red Hat":
return true
case "Rocky Linux":
return true
case "RubyGems":
return true
return existsInRubyGems(pkg)
case "SUSE":
return true
case "SwiftURL":
Expand Down Expand Up @@ -175,213 +183,3 @@ func VersionsExistInEcosystem(pkg string, versions []string, ecosystem string) e
}
return fmt.Errorf("unsupported ecosystem: %s", ecosystem)
}

// Validate the existence of a package in PyPI.
func existsInPyPI(pkg string) bool {
packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", strings.ToLower(pkg))

// This 404's for non-existent packages.
resp, err := faulttolerant.Head(packageInstanceURL)
if err != nil {
return false
}

return resp.StatusCode == http.StatusOK
}

// Confirm that all specified versions of a package exist in PyPI.
func versionsExistInPyPI(pkg string, versions []string) error {
// https://packaging.python.org/en/latest/specifications/name-normalization/
pythonNormalizationRegex := regexp.MustCompile(`[-_.]+`)
pkgNormalized := strings.ToLower(pythonNormalizationRegex.ReplaceAllString(pkg, "-"))
packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", pkgNormalized)

// This 404's for non-existent packages.
resp, err := faulttolerant.Get(packageInstanceURL)
if err != nil {
return fmt.Errorf("unable to validate package: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL)
}

// Parse the known versions from the JSON.
respJSON, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve JSON for %q: %v", pkg, err)
}
// Fetch all known versions of package.
versionsInPyPy := []string{}
releases := gjson.GetBytes(respJSON, "releases.@keys")
releases.ForEach(func(key, value gjson.Result) bool {
versionsInPyPy = append(versionsInPyPy, value.String())
return true // keep iterating.
})
// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
versionFound := false
vc, err := pep440.Parse(versionToCheckFor)
if err != nil {
versionsMissing = append(versionsMissing, versionToCheckFor)
continue
}
for _, pkgversion := range versionsInPyPy {
pv, err := pep440.Parse(pkgversion)
if err != nil {
continue
}
if pv.Equal(vc) {
versionFound = true
break
}
}
if versionFound {
continue
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return &MissingVersionsError{Package: pkg, Ecosystem: "PyPI", Missing: versionsMissing, Known: versionsInPyPy}
}

return nil
}

// Validate the existence of a package in Go.
func existsInGo(pkg string) bool {
// Of course the Go runtime exists :-)
if pkg == "stdlib" || pkg == "toolchain" {
return true
}

// The Go Module Proxy seems to require package names to be lowercase.
// GitHub URLs are known to be case-insensitive.
if strings.HasPrefix(pkg, "github.com/") {
pkg = strings.ToLower(pkg)
}

packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg)

// This 404's for non-existent packages.
resp, err := faulttolerant.Head(packageInstanceURL)
if err != nil {
return false
}
return resp.StatusCode == http.StatusOK
}

// Confirm that all specified versions of a package exist in Go.
func versionsExistInGo(pkg string, versions []string) error {
if pkg == "stdlib" || pkg == "toolchain" {
return goVersionsExist(versions)
}

// The Go Module Proxy seems to require package names to be lowercase.
// GitHub URLs are known to be case-insensitive.
if strings.HasPrefix(pkg, "github.com/") {
pkg = strings.ToLower(pkg)
}

packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg)

// This 404's for non-existent packages.
resp, err := faulttolerant.Get(packageInstanceURL)
if err != nil {
return fmt.Errorf("unable to validate package: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL)
}

// Load the known versions from the list provided.
respBytes, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve versions for for %q: %v", pkg, err)
}
// Fetch all known versions of package.
versionsInGo := strings.Split(strings.TrimSpace(string(respBytes)), "\n")
// It seems that an empty version set is plausible. Unreleased?
// e.g. github.com/nanobox-io/golang-nanoauth
if len(versionsInGo[0]) == 0 {
versionsInGo = []string{}
}
if len(versionsInGo) == 0 {
// TODO: This is warning-level worthy if warnings were a thing...
return nil
}

// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
// First, detect pseudo-version and skip it.
if module.IsPseudoVersion("v" + versionToCheckFor) {
// TODO: Try mapping the pseudo-version to a base version and
// checking for that instead of skipping.
continue
}
if slices.Contains(versionsInGo, semver.Canonical("v"+versionToCheckFor)) {
continue
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return &MissingVersionsError{Package: pkg, Ecosystem: "Go", Missing: versionsMissing, Known: versionsInGo}
}

return nil
}

// Confirm that all specified versions of Go exist.
func goVersionsExist(versions []string) error {
URL := "https://go.dev/dl/?mode=json&include=all"

resp, err := faulttolerant.Get(URL)
if err != nil {
return fmt.Errorf("unable to validate Go versions: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, URL)
}

// Fetch all known versions of Go.
// Parse the known versions from the JSON.
respJSON, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve JSON for Go: %v", err)
}
// Fetch all known versions of package.
goVersions := []string{}
releases := gjson.GetBytes(respJSON, "#.version")
releases.ForEach(func(key, value gjson.Result) bool {
goVersions = append(goVersions, value.String())
return true // keep iterating.
})

// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
if slices.Contains(goVersions, "go"+versionToCheckFor) {
continue
}
if semver.Prerelease("v"+versionToCheckFor) == "-0" {
// Coerce "1.16.0-0" to "1.16".
if slices.Contains(goVersions, "go"+strings.TrimPrefix(semver.MajorMinor("v"+versionToCheckFor), "v")) {
continue
}
// Coerce "1.21.0-0" to "1.21.0".
if slices.Contains(goVersions, "go"+strings.TrimPrefix(strings.TrimSuffix("v"+versionToCheckFor, semver.Prerelease("v"+versionToCheckFor)), "v")) {
continue
}
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return fmt.Errorf("failed to find %+v for Go in %+v", versionsMissing, goVersions)
}

return nil
}
Loading

0 comments on commit fa0a097

Please sign in to comment.