Skip to content

Commit

Permalink
✨ Add GitHub git compatibility mode (#4474)
Browse files Browse the repository at this point in the history
* add git handler for GitHub repositories

This is primarily aimed at helping in cases where a repository's
.gitattributes file causes files to not be analyzed.

Signed-off-by: Spencer Schrock <[email protected]>

* use variadic options to configure GitHub repoclient

This will let us use the new entrypoint in a backwards compatible way,
similar to the scorecard.Run change made in the v5 release.

Signed-off-by: Spencer Schrock <[email protected]>

* add flag to enable github git mode

Signed-off-by: Spencer Schrock <[email protected]>

* rename flag to be forge agnostic

export-ignore is not a github specific feature, and other forges, like
gitlab, suffer from the same bug.

Signed-off-by: Spencer Schrock <[email protected]>

* move git file handler to internal package

This will allow sharing with GitLab in a followup PR

Signed-off-by: Spencer Schrock <[email protected]>

* add a test

Signed-off-by: Spencer Schrock <[email protected]>

* use new toplevel gitmode argument

also moves a func around for smaller PR diff.

Signed-off-by: Spencer Schrock <[email protected]>

* add path traversal test

Signed-off-by: Spencer Schrock <[email protected]>

* change flag to file-mode

Signed-off-by: Spencer Schrock <[email protected]>

* fix repo typo in options test

the value isn't used to connect to anything though.

Signed-off-by: Spencer Schrock <[email protected]>

---------

Signed-off-by: Spencer Schrock <[email protected]>
  • Loading branch information
spencerschrock authored Feb 12, 2025
1 parent 6fc296e commit b0143fc
Show file tree
Hide file tree
Showing 8 changed files with 474 additions and 23 deletions.
97 changes: 92 additions & 5 deletions clients/githubrepo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/ossf/scorecard/v5/clients"
"github.com/ossf/scorecard/v5/clients/githubrepo/roundtripper"
sce "github.com/ossf/scorecard/v5/errors"
"github.com/ossf/scorecard/v5/internal/gitfile"
"github.com/ossf/scorecard/v5/log"
)

Expand All @@ -40,6 +41,8 @@ var (
errDefaultBranchEmpty = errors.New("default branch name is empty")
)

type Option func(*repoClientConfig) error

// Client is GitHub-specific implementation of RepoClient.
type Client struct {
repourl *Repo
Expand All @@ -57,9 +60,32 @@ type Client struct {
webhook *webhookHandler
languages *languagesHandler
licenses *licensesHandler
git *gitfile.Handler
ctx context.Context
tarball tarballHandler
commitDepth int
gitMode bool
}

// WithFileModeGit configures the repo client to fetch files using git.
func WithFileModeGit() Option {
return func(c *repoClientConfig) error {
c.gitMode = true
return nil
}
}

// WithRoundTripper configures the repo client to use the specified http.RoundTripper.
func WithRoundTripper(rt http.RoundTripper) Option {
return func(c *repoClientConfig) error {
c.rt = rt
return nil
}
}

type repoClientConfig struct {
rt http.RoundTripper
gitMode bool
}

const defaultGhHost = "github.com"
Expand Down Expand Up @@ -88,8 +114,12 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD
commitSHA: commitSHA,
}

// Init tarballHandler.
client.tarball.init(client.ctx, client.repo, commitSHA)
if client.gitMode {
client.git.Init(client.ctx, client.repo.GetCloneURL(), commitSHA)
} else {
// Init tarballHandler.
client.tarball.init(client.ctx, client.repo, commitSHA)
}

// Setup GraphQL.
client.graphClient.init(client.ctx, client.repourl, client.commitDepth)
Expand Down Expand Up @@ -141,16 +171,37 @@ func (client *Client) URI() string {

// LocalPath implements RepoClient.LocalPath.
func (client *Client) LocalPath() (string, error) {
if client.gitMode {
path, err := client.git.GetLocalPath()
if err != nil {
return "", fmt.Errorf("git local path: %w", err)
}
return path, nil
}
return client.tarball.getLocalPath()
}

// ListFiles implements RepoClient.ListFiles.
func (client *Client) ListFiles(predicate func(string) (bool, error)) ([]string, error) {
if client.gitMode {
files, err := client.git.ListFiles(predicate)
if err != nil {
return nil, fmt.Errorf("git listfiles: %w", err)
}
return files, nil
}
return client.tarball.listFiles(predicate)
}

// GetFileReader implements RepoClient.GetFileReader.
func (client *Client) GetFileReader(filename string) (io.ReadCloser, error) {
if client.gitMode {
f, err := client.git.GetFile(filename)
if err != nil {
return nil, fmt.Errorf("git getfile: %w", err)
}
return f, nil
}
return client.tarball.getFile(filename)
}

Expand Down Expand Up @@ -210,7 +261,14 @@ func (client *Client) GetOrgRepoClient(ctx context.Context) (clients.RepoClient,
return nil, fmt.Errorf("error during MakeGithubRepo: %w", err)
}

c := CreateGithubRepoClientWithTransport(ctx, client.repoClient.Client().Transport)
options := []Option{WithRoundTripper(client.repoClient.Client().Transport)}
if client.gitMode {
options = append(options, WithFileModeGit())
}
c, err := NewRepoClient(ctx, options...)
if err != nil {
return nil, fmt.Errorf("create org repoclient: %w", err)
}
if err := c.InitRepo(dotGithubRepo, clients.HeadSHA, 0); err != nil {
return nil, fmt.Errorf("error during InitRepo: %w", err)
}
Expand Down Expand Up @@ -260,13 +318,40 @@ func (client *Client) SearchCommits(request clients.SearchCommitsOptions) ([]cli

// Close implements RepoClient.Close.
func (client *Client) Close() error {
if client.gitMode {
if err := client.git.Cleanup(); err != nil {
return fmt.Errorf("git cleanup: %w", err)
}
return nil
}
return client.tarball.cleanup()
}

// CreateGithubRepoClientWithTransport returns a Client which implements RepoClient interface.
func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripper) clients.RepoClient {
//nolint:errcheck // need to suppress because this method doesn't return an error
rc, _ := NewRepoClient(ctx, WithRoundTripper(rt))
return rc
}

// NewRepoClient returns a Client which implements RepoClient interface.
// It can be configured with various [Option]s.
func NewRepoClient(ctx context.Context, opts ...Option) (clients.RepoClient, error) {
var config repoClientConfig

for _, option := range opts {
if err := option(&config); err != nil {
return nil, err
}
}

if config.rt == nil {
logger := log.NewLogger(log.DefaultLevel)
config.rt = roundtripper.NewTransport(ctx, logger)
}

httpClient := &http.Client{
Transport: rt,
Transport: config.rt,
}

var client *github.Client
Expand Down Expand Up @@ -333,7 +418,9 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp
tarball: tarballHandler{
httpClient: httpClient,
},
}
gitMode: config.gitMode,
git: &gitfile.Handler{},
}, nil
}

// CreateGithubRepoClient returns a Client which implements RepoClient interface.
Expand Down
9 changes: 7 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,18 @@ func rootCmd(o *options.Options) error {
}
}

repoResult, err = scorecard.Run(ctx, repo,
opts := []scorecard.Option{
scorecard.WithLogLevel(sclog.ParseLevel(o.LogLevel)),
scorecard.WithCommitSHA(o.Commit),
scorecard.WithCommitDepth(o.CommitDepth),
scorecard.WithProbes(enabledProbes),
scorecard.WithChecks(checks),
)
}
if strings.EqualFold(o.FileMode, options.FileModeGit) {
opts = append(opts, scorecard.WithFileModeGit())
}

repoResult, err = scorecard.Run(ctx, repo, opts...)
if err != nil {
return fmt.Errorf("scorecard.Run: %w", err)
}
Expand Down
160 changes: 160 additions & 0 deletions internal/gitfile/gitfile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright 2025 OpenSSF Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package gitfile defines functionality to list and fetch files after temporarily cloning a git repo.
package gitfile

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"

"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"

"github.com/ossf/scorecard/v5/clients"
)

var errPathTraversal = errors.New("requested file outside repo")

const repoDir = "repo*"

type Handler struct {
errSetup error
ctx context.Context
once *sync.Once
cloneURL string
gitRepo *git.Repository
tempDir string
commitSHA string
}

func (h *Handler) Init(ctx context.Context, cloneURL, commitSHA string) {
h.errSetup = nil
h.once = new(sync.Once)
h.ctx = ctx
h.cloneURL = cloneURL
h.commitSHA = commitSHA
}

func (h *Handler) setup() error {
h.once.Do(func() {
tempDir, err := os.MkdirTemp("", repoDir)
if err != nil {
h.errSetup = err
return
}
h.tempDir = tempDir
h.gitRepo, err = git.PlainClone(h.tempDir, false, &git.CloneOptions{
URL: h.cloneURL,
// TODO: auth may be required for private repos
Depth: 1, // currently only use the git repo for files, dont need history
SingleBranch: true,
})
if err != nil {
h.errSetup = err
return
}

// assume the commit SHA is reachable from the default branch
// this isn't as flexible as the tarball handler, but good enough for now
if h.commitSHA != clients.HeadSHA {
wt, err := h.gitRepo.Worktree()
if err != nil {
h.errSetup = err
return
}
if err := wt.Checkout(&git.CheckoutOptions{Hash: plumbing.NewHash(h.commitSHA)}); err != nil {
h.errSetup = fmt.Errorf("checkout specified commit: %w", err)
return
}
}
})
return h.errSetup
}

func (h *Handler) GetLocalPath() (string, error) {
if err := h.setup(); err != nil {
return "", fmt.Errorf("setup: %w", err)
}
return h.tempDir, nil
}

func (h *Handler) ListFiles(predicate func(string) (bool, error)) ([]string, error) {
if err := h.setup(); err != nil {
return nil, fmt.Errorf("setup: %w", err)
}
ref, err := h.gitRepo.Head()
if err != nil {
return nil, fmt.Errorf("git.Head: %w", err)
}

commit, err := h.gitRepo.CommitObject(ref.Hash())
if err != nil {
return nil, fmt.Errorf("git.CommitObject: %w", err)
}

tree, err := commit.Tree()
if err != nil {
return nil, fmt.Errorf("git.Commit.Tree: %w", err)
}

var files []string
err = tree.Files().ForEach(func(f *object.File) error {
shouldInclude, err := predicate(f.Name)
if err != nil {
return fmt.Errorf("error applying predicate to file %s: %w", f.Name, err)
}

if shouldInclude {
files = append(files, f.Name)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("git.Tree.Files: %w", err)
}

return files, nil
}

func (h *Handler) GetFile(filename string) (*os.File, error) {
if err := h.setup(); err != nil {
return nil, fmt.Errorf("setup: %w", err)
}

// check for path traversal
path := filepath.Join(h.tempDir, filename)
if !strings.HasPrefix(path, filepath.Clean(h.tempDir)+string(os.PathSeparator)) {
return nil, errPathTraversal
}

f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file: %w", err)
}
return f, nil
}

func (h *Handler) Cleanup() error {
if err := os.RemoveAll(h.tempDir); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("os.Remove: %w", err)
}
return nil
}
Loading

0 comments on commit b0143fc

Please sign in to comment.