From 1857d08a05109fd80d2443fff0f5ecda76cc3b89 Mon Sep 17 00:00:00 2001 From: Holly Gong Date: Tue, 17 Dec 2024 16:38:54 +1100 Subject: [PATCH 1/4] feat(linter): expand package checks across multiple ecosystems Signed-off-by: Holly Gong --- .../internal/pkgchecker/ecosystems.go | 260 ++---------------- .../internal/pkgchecker/package_check.go | 100 +++++++ .../internal/pkgchecker/package_check_test.go | 260 ++++++++++++++++++ .../internal/pkgchecker/version_check.go | 190 +++++++++++++ 4 files changed, 579 insertions(+), 231 deletions(-) create mode 100644 tools/osv-linter/internal/pkgchecker/package_check.go create mode 100644 tools/osv-linter/internal/pkgchecker/package_check_test.go create mode 100644 tools/osv-linter/internal/pkgchecker/version_check.go diff --git a/tools/osv-linter/internal/pkgchecker/ecosystems.go b/tools/osv-linter/internal/pkgchecker/ecosystems.go index ed6beea..1740926 100644 --- a/tools/osv-linter/internal/pkgchecker/ecosystems.go +++ b/tools/osv-linter/internal/pkgchecker/ecosystems.go @@ -2,26 +2,34 @@ package pkgchecker import ( "fmt" - "io" - "net/http" - "regexp" - "slices" - "strings" - - "golang.org/x/mod/module" - "golang.org/x/mod/semver" - - "github.com/ossf/osv-schema/linter/internal/faulttolerant" - - pep440 "github.com/aquasecurity/go-pep440-version" - - "github.com/tidwall/gjson" ) // Ecosystem support is a work in progress. var SupportedEcosystems = []string{ "Go", "PyPI", + "crates.io", + "npm", + "NuGet", + "RubyGems", + "Packagist", + "Pub", + "Hackage", + "Maven", +} + +// EcosystemBaseURLs maps ecosystems to their base API URLs. +var EcosystemBaseURLs = map[string]string{ + "Go": "https://proxy.golang.org", + "PyPI": "https://pypi.org/pypi", + "crates.io": "https://crates.io/api/v1/crates", + "npm": "https://registry.npmjs.org", + "NuGet": "https://api.nuget.org/v3-flatcontainer", + "RubyGems": "https://rubygems.org/api/v1/gems", + "Packagist": "https://repo.packagist.org/p2", + "Pub": "https://pub.dev/api/packages", + "Hackage": "https://hackage.haskell.org/package", + "Maven": "https://search.maven.org/solrsearch/select", } // Dispatcher for ecosystem-specific package existence checking. @@ -40,7 +48,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool { case "CRAN": return true case "crates.io": - return true + return existsInCrates(pkg) case "Debian": return true case "GIT": @@ -52,7 +60,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool { case "GSD": return true case "Hackage": - return true + return existsInHackage(pkg) case "Hex": return true case "Linux": @@ -60,17 +68,17 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool { case "Maven": return true case "npm": - return true + return existsInNpm(pkg) case "NuGet": - return true + return existsInNuget(pkg) case "openSUSE": return true case "OSS-Fuzz": return true case "Packagist": - return true + return existsInPackagist(pkg) case "Pub": - return true + return existsInPub(pkg) case "PyPI": return existsInPyPI(pkg) case "Red Hat": @@ -78,7 +86,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool { case "Rocky Linux": return true case "RubyGems": - return true + return existsInRubyGems(pkg) case "SUSE": return true case "SwiftURL": @@ -173,213 +181,3 @@ func VersionsExistInEcosystem(pkg string, versions []string, ecosystem string) e } return fmt.Errorf("unsupported ecosystem: %s", ecosystem) } - -// Validate the existence of a package in PyPI. -func existsInPyPI(pkg string) bool { - packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", strings.ToLower(pkg)) - - // This 404's for non-existent packages. - resp, err := faulttolerant.Head(packageInstanceURL) - if err != nil { - return false - } - - return resp.StatusCode == http.StatusOK -} - -// Confirm that all specified versions of a package exist in PyPI. -func versionsExistInPyPI(pkg string, versions []string) error { - // https://packaging.python.org/en/latest/specifications/name-normalization/ - pythonNormalizationRegex := regexp.MustCompile(`[-_.]+`) - pkgNormalized := strings.ToLower(pythonNormalizationRegex.ReplaceAllString(pkg, "-")) - packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", pkgNormalized) - - // This 404's for non-existent packages. - resp, err := faulttolerant.Get(packageInstanceURL) - if err != nil { - return fmt.Errorf("unable to validate package: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL) - } - - // Parse the known versions from the JSON. - respJSON, err := io.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("unable to retrieve JSON for %q: %v", pkg, err) - } - // Fetch all known versions of package. - versionsInPyPy := []string{} - releases := gjson.GetBytes(respJSON, "releases.@keys") - releases.ForEach(func(key, value gjson.Result) bool { - versionsInPyPy = append(versionsInPyPy, value.String()) - return true // keep iterating. - }) - // Determine which referenced versions are missing. - versionsMissing := []string{} - for _, versionToCheckFor := range versions { - versionFound := false - vc, err := pep440.Parse(versionToCheckFor) - if err != nil { - versionsMissing = append(versionsMissing, versionToCheckFor) - continue - } - for _, pkgversion := range versionsInPyPy { - pv, err := pep440.Parse(pkgversion) - if err != nil { - continue - } - if pv.Equal(vc) { - versionFound = true - break - } - } - if versionFound { - continue - } - versionsMissing = append(versionsMissing, versionToCheckFor) - } - if len(versionsMissing) > 0 { - return &MissingVersionsError{Package: pkg, Ecosystem: "PyPI", Missing: versionsMissing, Known: versionsInPyPy} - } - - return nil -} - -// Validate the existence of a package in Go. -func existsInGo(pkg string) bool { - // Of course the Go runtime exists :-) - if pkg == "stdlib" || pkg == "toolchain" { - return true - } - - // The Go Module Proxy seems to require package names to be lowercase. - // GitHub URLs are known to be case-insensitive. - if strings.HasPrefix(pkg, "github.com/") { - pkg = strings.ToLower(pkg) - } - - packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg) - - // This 404's for non-existent packages. - resp, err := faulttolerant.Head(packageInstanceURL) - if err != nil { - return false - } - return resp.StatusCode == http.StatusOK -} - -// Confirm that all specified versions of a package exist in Go. -func versionsExistInGo(pkg string, versions []string) error { - if pkg == "stdlib" || pkg == "toolchain" { - return goVersionsExist(versions) - } - - // The Go Module Proxy seems to require package names to be lowercase. - // GitHub URLs are known to be case-insensitive. - if strings.HasPrefix(pkg, "github.com/") { - pkg = strings.ToLower(pkg) - } - - packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg) - - // This 404's for non-existent packages. - resp, err := faulttolerant.Get(packageInstanceURL) - if err != nil { - return fmt.Errorf("unable to validate package: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL) - } - - // Load the known versions from the list provided. - respBytes, err := io.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("unable to retrieve versions for for %q: %v", pkg, err) - } - // Fetch all known versions of package. - versionsInGo := strings.Split(strings.TrimSpace(string(respBytes)), "\n") - // It seems that an empty version set is plausible. Unreleased? - // e.g. github.com/nanobox-io/golang-nanoauth - if len(versionsInGo[0]) == 0 { - versionsInGo = []string{} - } - if len(versionsInGo) == 0 { - // TODO: This is warning-level worthy if warnings were a thing... - return nil - } - - // Determine which referenced versions are missing. - versionsMissing := []string{} - for _, versionToCheckFor := range versions { - // First, detect pseudo-version and skip it. - if module.IsPseudoVersion("v" + versionToCheckFor) { - // TODO: Try mapping the pseudo-version to a base version and - // checking for that instead of skipping. - continue - } - if slices.Contains(versionsInGo, semver.Canonical("v"+versionToCheckFor)) { - continue - } - versionsMissing = append(versionsMissing, versionToCheckFor) - } - if len(versionsMissing) > 0 { - return &MissingVersionsError{Package: pkg, Ecosystem: "Go", Missing: versionsMissing, Known: versionsInGo} - } - - return nil -} - -// Confirm that all specified versions of Go exist. -func goVersionsExist(versions []string) error { - URL := "https://go.dev/dl/?mode=json&include=all" - - resp, err := faulttolerant.Get(URL) - if err != nil { - return fmt.Errorf("unable to validate Go versions: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("unable to validate package: %q for %s", resp.Status, URL) - } - - // Fetch all known versions of Go. - // Parse the known versions from the JSON. - respJSON, err := io.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("unable to retrieve JSON for Go: %v", err) - } - // Fetch all known versions of package. - goVersions := []string{} - releases := gjson.GetBytes(respJSON, "#.version") - releases.ForEach(func(key, value gjson.Result) bool { - goVersions = append(goVersions, value.String()) - return true // keep iterating. - }) - - // Determine which referenced versions are missing. - versionsMissing := []string{} - for _, versionToCheckFor := range versions { - if slices.Contains(goVersions, "go"+versionToCheckFor) { - continue - } - if semver.Prerelease("v"+versionToCheckFor) == "-0" { - // Coerce "1.16.0-0" to "1.16". - if slices.Contains(goVersions, "go"+strings.TrimPrefix(semver.MajorMinor("v"+versionToCheckFor), "v")) { - continue - } - // Coerce "1.21.0-0" to "1.21.0". - if slices.Contains(goVersions, "go"+strings.TrimPrefix(strings.TrimSuffix("v"+versionToCheckFor, semver.Prerelease("v"+versionToCheckFor)), "v")) { - continue - } - } - versionsMissing = append(versionsMissing, versionToCheckFor) - } - if len(versionsMissing) > 0 { - return fmt.Errorf("failed to find %+v for Go in %+v", versionsMissing, goVersions) - } - - return nil -} diff --git a/tools/osv-linter/internal/pkgchecker/package_check.go b/tools/osv-linter/internal/pkgchecker/package_check.go new file mode 100644 index 0000000..5d21369 --- /dev/null +++ b/tools/osv-linter/internal/pkgchecker/package_check.go @@ -0,0 +1,100 @@ +package pkgchecker + +import ( + "fmt" + "net/http" + "strings" + + "github.com/ossf/osv-schema/linter/internal/faulttolerant" +) + +// Validate the existence of a package in crates.io. +func existsInCrates(pkg string) bool { + // Handle special case for rust standard library + if pkg == "std" { + return true + } + + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["crates.io"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in npm. +func existsInNpm(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["npm"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in NuGet. +func existsInNuget(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s/index.json", EcosystemBaseURLs["NuGet"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in RubyGems. +func existsInRubyGems(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s.json", EcosystemBaseURLs["RubyGems"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in Packagist. +func existsInPackagist(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s.json", EcosystemBaseURLs["Packagist"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in Pub. +func existsInPub(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["Pub"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in Hackage. +func existsInHackage(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["Hackage"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in PyPI. +// Note: for malicious packages, if the package has been removed, the verify will be fail +func existsInPyPI(pkg string) bool { + packageInstanceURL := fmt.Sprintf("%s/%s/json", EcosystemBaseURLs["PyPI"], strings.ToLower(pkg)) + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in Go. +func existsInGo(pkg string) bool { + // Of course the Go runtime exists :-) + if pkg == "stdlib" || pkg == "toolchain" { + return true + } + + // The Go Module Proxy seems to require package names to be lowercase. + // GitHub URLs are known to be case-insensitive. + if strings.HasPrefix(pkg, "github.com/") { + pkg = strings.ToLower(pkg) + } + + packageInstanceURL := fmt.Sprintf("%s/%s/@v/list", EcosystemBaseURLs["Go"], pkg) + + return checkPackageExists(packageInstanceURL) +} + +// Makes an HTTP GET request to check package existance, with fault tolerance. +func checkPackageExists(packageInstanceURL string) bool { + // This 404's for non-existent packages. + resp, err := faulttolerant.Head(packageInstanceURL) + if err != nil { + return false + } + + return resp.StatusCode == http.StatusOK +} diff --git a/tools/osv-linter/internal/pkgchecker/package_check_test.go b/tools/osv-linter/internal/pkgchecker/package_check_test.go new file mode 100644 index 0000000..01fdebf --- /dev/null +++ b/tools/osv-linter/internal/pkgchecker/package_check_test.go @@ -0,0 +1,260 @@ +package pkgchecker + +import ( + "testing" +) + +func Test_existsInCrates(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "surrealdb-core", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + { + name: "rust standard library", + pkg: "std", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInCrates(tt.pkg); got != tt.want { + t.Errorf("existsInCrates() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInNpm(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "proxyapi-docs", + want: true, + }, + { + name: "existing package with a special name", + pkg: "@saferpay/components", + want: true, + }, + { + name: "non-existent package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInNpm(tt.pkg); got != tt.want { + t.Errorf("existsInNpm() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInNuget(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "Newtonsoft.Json", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInNuget(tt.pkg); got != tt.want { + t.Errorf("existsInNuget() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInRubyGems(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "rails-html-sanitizer", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInRubyGems(tt.pkg); got != tt.want { + t.Errorf("existsInRubyGems() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInPackagist(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "drupal/core", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInPackagist(tt.pkg); got != tt.want { + t.Errorf("existsInPackagist() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInPub(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "serverpod_client", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInPub(tt.pkg); got != tt.want { + t.Errorf("existsInPub() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInHackage(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "git-annex", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInHackage(tt.pkg); got != tt.want { + t.Errorf("existsInHackage() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInPyPI(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "python-libarchive", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInPyPI(tt.pkg); got != tt.want { + t.Errorf("existsInPyPI() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_existsInGo(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "cosmossdk.io/math", + want: true, + }, + { + name: "stdlib", + pkg: "stdlib", + want: true, + }, + { + name: "github package", + pkg: "github.com/mattermost/mattermost/server/v8", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInGo(tt.pkg); got != tt.want { + t.Errorf("existsInGo() = %v, want %v", got, tt.want) + } + + }) + } +} diff --git a/tools/osv-linter/internal/pkgchecker/version_check.go b/tools/osv-linter/internal/pkgchecker/version_check.go new file mode 100644 index 0000000..53e39bc --- /dev/null +++ b/tools/osv-linter/internal/pkgchecker/version_check.go @@ -0,0 +1,190 @@ +package pkgchecker + +import ( + "fmt" + "io" + "net/http" + "regexp" + "slices" + "strings" + + pep440 "github.com/aquasecurity/go-pep440-version" + "github.com/ossf/osv-schema/linter/internal/faulttolerant" + "github.com/tidwall/gjson" + "golang.org/x/mod/module" + "golang.org/x/mod/semver" +) + +// Confirm that all specified versions of a package exist in PyPI. +func versionsExistInPyPI(pkg string, versions []string) error { + // https://packaging.python.org/en/latest/specifications/name-normalization/ + pythonNormalizationRegex := regexp.MustCompile(`[-_.]+`) + pkgNormalized := strings.ToLower(pythonNormalizationRegex.ReplaceAllString(pkg, "-")) + packageInstanceURL := fmt.Sprintf("%s/%s/json", EcosystemBaseURLs["PyPI"], pkgNormalized) + + // This 404's for non-existent packages. + resp, err := faulttolerant.Get(packageInstanceURL) + if err != nil { + return fmt.Errorf("unable to validate package: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL) + } + + // Parse the known versions from the JSON. + respJSON, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("unable to retrieve JSON for %q: %v", pkg, err) + } + // Fetch all known versions of package. + versionsInPyPy := []string{} + releases := gjson.GetBytes(respJSON, "releases.@keys") + releases.ForEach(func(key, value gjson.Result) bool { + versionsInPyPy = append(versionsInPyPy, value.String()) + return true // keep iterating. + }) + // Determine which referenced versions are missing. + versionsMissing := []string{} + for _, versionToCheckFor := range versions { + versionFound := false + vc, err := pep440.Parse(versionToCheckFor) + if err != nil { + versionsMissing = append(versionsMissing, versionToCheckFor) + continue + } + for _, pkgversion := range versionsInPyPy { + pv, err := pep440.Parse(pkgversion) + if err != nil { + continue + } + if pv.Equal(vc) { + versionFound = true + break + } + } + if versionFound { + continue + } + versionsMissing = append(versionsMissing, versionToCheckFor) + } + if len(versionsMissing) > 0 { + return &MissingVersionsError{Package: pkg, Ecosystem: "PyPI", Missing: versionsMissing, Known: versionsInPyPy} + } + + return nil +} + +// Confirm that all specified versions of a package exist in Go. +func versionsExistInGo(pkg string, versions []string) error { + if pkg == "stdlib" || pkg == "toolchain" { + return goVersionsExist(versions) + } + + // The Go Module Proxy seems to require package names to be lowercase. + // GitHub URLs are known to be case-insensitive. + if strings.HasPrefix(pkg, "github.com/") { + pkg = strings.ToLower(pkg) + } + + packageInstanceURL := fmt.Sprintf("%s/%s/@v/list", EcosystemBaseURLs["Go"], pkg) + + // This 404's for non-existent packages. + resp, err := faulttolerant.Get(packageInstanceURL) + if err != nil { + return fmt.Errorf("unable to validate package: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL) + } + + // Load the known versions from the list provided. + respBytes, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("unable to retrieve versions for for %q: %v", pkg, err) + } + // Fetch all known versions of package. + versionsInGo := strings.Split(strings.TrimSpace(string(respBytes)), "\n") + // It seems that an empty version set is plausible. Unreleased? + // e.g. github.com/nanobox-io/golang-nanoauth + if len(versionsInGo[0]) == 0 { + versionsInGo = []string{} + } + if len(versionsInGo) == 0 { + // TODO: This is warning-level worthy if warnings were a thing... + return nil + } + + // Determine which referenced versions are missing. + versionsMissing := []string{} + for _, versionToCheckFor := range versions { + // First, detect pseudo-version and skip it. + if module.IsPseudoVersion("v" + versionToCheckFor) { + // TODO: Try mapping the pseudo-version to a base version and + // checking for that instead of skipping. + continue + } + if slices.Contains(versionsInGo, semver.Canonical("v"+versionToCheckFor)) { + continue + } + versionsMissing = append(versionsMissing, versionToCheckFor) + } + if len(versionsMissing) > 0 { + return &MissingVersionsError{Package: pkg, Ecosystem: "Go", Missing: versionsMissing, Known: versionsInGo} + } + + return nil +} + +// Confirm that all specified versions of Go exist. +func goVersionsExist(versions []string) error { + URL := "https://go.dev/dl/?mode=json&include=all" + + resp, err := faulttolerant.Get(URL) + if err != nil { + return fmt.Errorf("unable to validate Go versions: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unable to validate package: %q for %s", resp.Status, URL) + } + + // Fetch all known versions of Go. + // Parse the known versions from the JSON. + respJSON, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("unable to retrieve JSON for Go: %v", err) + } + // Fetch all known versions of package. + goVersions := []string{} + releases := gjson.GetBytes(respJSON, "#.version") + releases.ForEach(func(key, value gjson.Result) bool { + goVersions = append(goVersions, value.String()) + return true // keep iterating. + }) + + // Determine which referenced versions are missing. + versionsMissing := []string{} + for _, versionToCheckFor := range versions { + if slices.Contains(goVersions, "go"+versionToCheckFor) { + continue + } + if semver.Prerelease("v"+versionToCheckFor) == "-0" { + // Coerce "1.16.0-0" to "1.16". + if slices.Contains(goVersions, "go"+strings.TrimPrefix(semver.MajorMinor("v"+versionToCheckFor), "v")) { + continue + } + // Coerce "1.21.0-0" to "1.21.0". + if slices.Contains(goVersions, "go"+strings.TrimPrefix(strings.TrimSuffix("v"+versionToCheckFor, semver.Prerelease("v"+versionToCheckFor)), "v")) { + continue + } + } + versionsMissing = append(versionsMissing, versionToCheckFor) + } + if len(versionsMissing) > 0 { + return fmt.Errorf("failed to find %+v for Go in %+v", versionsMissing, goVersions) + } + + return nil +} From 16648723d7b3cbfa84eca81a7906511fb643fab2 Mon Sep 17 00:00:00 2001 From: Holly Gong Date: Tue, 17 Dec 2024 17:12:10 +1100 Subject: [PATCH 2/4] add maven support Signed-off-by: Holly Gong --- .../internal/pkgchecker/ecosystems.go | 2 +- .../internal/pkgchecker/package_check.go | 20 +++++++++++++- .../internal/pkgchecker/package_check_test.go | 26 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/tools/osv-linter/internal/pkgchecker/ecosystems.go b/tools/osv-linter/internal/pkgchecker/ecosystems.go index 1740926..54a088a 100644 --- a/tools/osv-linter/internal/pkgchecker/ecosystems.go +++ b/tools/osv-linter/internal/pkgchecker/ecosystems.go @@ -66,7 +66,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool { case "Linux": return true case "Maven": - return true + return existsInMaven(pkg) case "npm": return existsInNpm(pkg) case "NuGet": diff --git a/tools/osv-linter/internal/pkgchecker/package_check.go b/tools/osv-linter/internal/pkgchecker/package_check.go index 5d21369..4288e29 100644 --- a/tools/osv-linter/internal/pkgchecker/package_check.go +++ b/tools/osv-linter/internal/pkgchecker/package_check.go @@ -62,8 +62,26 @@ func existsInHackage(pkg string) bool { return checkPackageExists(packageInstanceURL) } +// Validate the existence of a package in Maven. +func existsInMaven(pkg string) bool { + if !strings.Contains(pkg, ":") { + return false + } + group_id := strings.Split(pkg, ":")[0] + artifact_id := strings.Split(pkg, ":")[1] + packageInstanceURL := fmt.Sprintf("%s/?q=g:%s%%20AND%%20a:%s", EcosystemBaseURLs["Maven"], group_id, artifact_id) + fmt.Println(packageInstanceURL) + + // Needs to use GET instead of HEAD for Maven + resp, err := faulttolerant.Get(packageInstanceURL) + if err != nil { + return false + } + + return resp.StatusCode == http.StatusOK +} + // Validate the existence of a package in PyPI. -// Note: for malicious packages, if the package has been removed, the verify will be fail func existsInPyPI(pkg string) bool { packageInstanceURL := fmt.Sprintf("%s/%s/json", EcosystemBaseURLs["PyPI"], strings.ToLower(pkg)) diff --git a/tools/osv-linter/internal/pkgchecker/package_check_test.go b/tools/osv-linter/internal/pkgchecker/package_check_test.go index 01fdebf..17a8b00 100644 --- a/tools/osv-linter/internal/pkgchecker/package_check_test.go +++ b/tools/osv-linter/internal/pkgchecker/package_check_test.go @@ -196,6 +196,32 @@ func Test_existsInHackage(t *testing.T) { } } +func Test_existsInMaven(t *testing.T) { + tests := []struct { + name string + pkg string + want bool + }{ + { + name: "existing package", + pkg: "de.gematik.refv.commons:commons", + want: true, + }, + { + name: "non-existing package", + pkg: "non-existing-package", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := existsInMaven(tt.pkg); got != tt.want { + t.Errorf("existsInMaven() = %v, want %v", got, tt.want) + } + }) + } +} + func Test_existsInPyPI(t *testing.T) { tests := []struct { name string From 9447df1eb7e8fac56b14e6fb48d643a93b803007 Mon Sep 17 00:00:00 2001 From: Holly Gong Date: Tue, 17 Dec 2024 17:25:46 +1100 Subject: [PATCH 3/4] remove print statement Signed-off-by: Holly Gong --- tools/osv-linter/internal/pkgchecker/package_check.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/osv-linter/internal/pkgchecker/package_check.go b/tools/osv-linter/internal/pkgchecker/package_check.go index 4288e29..27b91fb 100644 --- a/tools/osv-linter/internal/pkgchecker/package_check.go +++ b/tools/osv-linter/internal/pkgchecker/package_check.go @@ -70,7 +70,6 @@ func existsInMaven(pkg string) bool { group_id := strings.Split(pkg, ":")[0] artifact_id := strings.Split(pkg, ":")[1] packageInstanceURL := fmt.Sprintf("%s/?q=g:%s%%20AND%%20a:%s", EcosystemBaseURLs["Maven"], group_id, artifact_id) - fmt.Println(packageInstanceURL) // Needs to use GET instead of HEAD for Maven resp, err := faulttolerant.Get(packageInstanceURL) From b91e4e331c1f373d5169c07532e3cbf0fa9125da Mon Sep 17 00:00:00 2001 From: Holly Gong Date: Thu, 19 Dec 2024 13:35:44 +1100 Subject: [PATCH 4/4] use deps.dev as primary check source Signed-off-by: Holly Gong --- .../internal/pkgchecker/package_check.go | 94 +++++++++++++------ .../internal/pkgchecker/package_check_test.go | 2 +- 2 files changed, 66 insertions(+), 30 deletions(-) diff --git a/tools/osv-linter/internal/pkgchecker/package_check.go b/tools/osv-linter/internal/pkgchecker/package_check.go index 27b91fb..13d2d03 100644 --- a/tools/osv-linter/internal/pkgchecker/package_check.go +++ b/tools/osv-linter/internal/pkgchecker/package_check.go @@ -15,21 +15,59 @@ func existsInCrates(pkg string) bool { return true } - packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["crates.io"], pkg) + ecosystem := "crates.io" + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs[ecosystem], pkg) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } + + return checkPackageExists(packageInstanceURL) +} + +// Validate the existence of a package in Go. +func existsInGo(pkg string) bool { + // Of course the Go runtime exists :-) + if pkg == "stdlib" || pkg == "toolchain" { + return true + } + + // The Go Module Proxy seems to require package names to be lowercase. + // GitHub URLs are known to be case-insensitive. + if strings.HasPrefix(pkg, "github.com/") { + pkg = strings.ToLower(pkg) + } + + ecosystem := "Go" + packageInstanceURL := fmt.Sprintf("%s/%s/@v/list", EcosystemBaseURLs[ecosystem], pkg) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } return checkPackageExists(packageInstanceURL) } // Validate the existence of a package in npm. func existsInNpm(pkg string) bool { - packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["npm"], pkg) + ecosystem := "npm" + packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs[ecosystem], pkg) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } return checkPackageExists(packageInstanceURL) } // Validate the existence of a package in NuGet. func existsInNuget(pkg string) bool { - packageInstanceURL := fmt.Sprintf("%s/%s/index.json", EcosystemBaseURLs["NuGet"], pkg) + ecosystem := "NuGet" + packageInstanceURL := fmt.Sprintf("%s/%s/index.json", EcosystemBaseURLs[ecosystem], pkg) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } return checkPackageExists(packageInstanceURL) } @@ -48,6 +86,18 @@ func existsInPackagist(pkg string) bool { return checkPackageExists(packageInstanceURL) } +// Validate the existence of a package in PyPI. +func existsInPyPI(pkg string) bool { + ecosystem := "PyPI" + packageInstanceURL := fmt.Sprintf("%s/%s/json", EcosystemBaseURLs[ecosystem], strings.ToLower(pkg)) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } + + return checkPackageExists(packageInstanceURL) +} + // Validate the existence of a package in Pub. func existsInPub(pkg string) bool { packageInstanceURL := fmt.Sprintf("%s/%s", EcosystemBaseURLs["Pub"], pkg) @@ -69,7 +119,13 @@ func existsInMaven(pkg string) bool { } group_id := strings.Split(pkg, ":")[0] artifact_id := strings.Split(pkg, ":")[1] - packageInstanceURL := fmt.Sprintf("%s/?q=g:%s%%20AND%%20a:%s", EcosystemBaseURLs["Maven"], group_id, artifact_id) + + ecosystem := "Maven" + packageInstanceURL := fmt.Sprintf("%s/?q=g:%s%%20AND%%20a:%s", EcosystemBaseURLs[ecosystem], group_id, artifact_id) + + if isPackageInDepsDev(ecosystem, pkg) { + return true + } // Needs to use GET instead of HEAD for Maven resp, err := faulttolerant.Get(packageInstanceURL) @@ -80,31 +136,6 @@ func existsInMaven(pkg string) bool { return resp.StatusCode == http.StatusOK } -// Validate the existence of a package in PyPI. -func existsInPyPI(pkg string) bool { - packageInstanceURL := fmt.Sprintf("%s/%s/json", EcosystemBaseURLs["PyPI"], strings.ToLower(pkg)) - - return checkPackageExists(packageInstanceURL) -} - -// Validate the existence of a package in Go. -func existsInGo(pkg string) bool { - // Of course the Go runtime exists :-) - if pkg == "stdlib" || pkg == "toolchain" { - return true - } - - // The Go Module Proxy seems to require package names to be lowercase. - // GitHub URLs are known to be case-insensitive. - if strings.HasPrefix(pkg, "github.com/") { - pkg = strings.ToLower(pkg) - } - - packageInstanceURL := fmt.Sprintf("%s/%s/@v/list", EcosystemBaseURLs["Go"], pkg) - - return checkPackageExists(packageInstanceURL) -} - // Makes an HTTP GET request to check package existance, with fault tolerance. func checkPackageExists(packageInstanceURL string) bool { // This 404's for non-existent packages. @@ -115,3 +146,8 @@ func checkPackageExists(packageInstanceURL string) bool { return resp.StatusCode == http.StatusOK } + +func isPackageInDepsDev(ecosystem string, pkg string) bool { + url := fmt.Sprintf("https://api.deps.dev/v3/systems/%s/packages/%s", ecosystem, pkg) + return checkPackageExists(url) +} diff --git a/tools/osv-linter/internal/pkgchecker/package_check_test.go b/tools/osv-linter/internal/pkgchecker/package_check_test.go index 17a8b00..dad46c8 100644 --- a/tools/osv-linter/internal/pkgchecker/package_check_test.go +++ b/tools/osv-linter/internal/pkgchecker/package_check_test.go @@ -74,7 +74,7 @@ func Test_existsInNuget(t *testing.T) { }{ { name: "existing package", - pkg: "Newtonsoft.Json", + pkg: "System.Formats.Nrbf", want: true, }, {