Skip to content

Commit

Permalink
shim: add support for containerd v2 metrics
Browse files Browse the repository at this point in the history
Add support for v2 containerd metrics in the shim, v2 metrics are only used when runsc is run with --system-cgroup=true.
Containerd requires v2 metrics when the host is run with CGroupsV2.
This issue was noticed when attempting to gather metrics on AL2023 which defaults to CGroupsV2.

Fixes: #11472
Signed-off-by: Champ-Goblem <[email protected]>
  • Loading branch information
Champ-Goblem committed Feb 13, 2025
1 parent dd8ea25 commit ec422c0
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pkg/shim/runsc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ go_library(
"@com_github_containerd_cgroups//:go_default_library",
"@com_github_containerd_cgroups//stats/v1:go_default_library",
"@com_github_containerd_cgroups//v2:go_default_library",
"@com_github_containerd_cgroups//v2/stats:go_default_library",
"@com_github_containerd_console//:go_default_library",
"@com_github_containerd_containerd//api/events:go_default_library",
"@com_github_containerd_containerd//api/types/task:go_default_library",
Expand All @@ -47,6 +48,7 @@ go_library(
"@com_github_containerd_errdefs//:go_default_library",
"@com_github_containerd_fifo//:go_default_library",
"@com_github_containerd_log//:go_default_library",
"@com_github_containerd_go_runc//:go_default_library",
"@com_github_containerd_typeurl//:go_default_library",
"@com_github_gogo_protobuf//types:go_default_library",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
Expand Down
55 changes: 52 additions & 3 deletions pkg/shim/runsc/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package runsc
import (
"context"
"fmt"
"github.com/containerd/go-runc"
"io"
"os"
"path/filepath"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/containerd/cgroups"
cgroupsstats "github.com/containerd/cgroups/stats/v1"
cgroupsv2 "github.com/containerd/cgroups/v2"
cgroupsv2stats "github.com/containerd/cgroups/v2/stats"
"github.com/containerd/console"
"github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
Expand All @@ -49,7 +51,7 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/shim/runtimeoptions/v14"
v14 "gvisor.dev/gvisor/pkg/shim/runtimeoptions/v14"

"gvisor.dev/gvisor/pkg/shim/extension"
"gvisor.dev/gvisor/pkg/shim/proc"
Expand Down Expand Up @@ -660,6 +662,18 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
// as runc.
//
// [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81
return s.getStats(stats, r)
}

func (s *runscService) getStats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
if s.opts.RunscConfig["systemd-cgroup"] == "true" {
return s.getV2Stats(stats, r)
} else {
return s.getV1Stats(stats, r)
}
}

func (s *runscService) getV1Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
metrics := &cgroupsstats.Metrics{
CPU: &cgroupsstats.CPUStat{
Usage: &cgroupsstats.CPUUsage{
Expand Down Expand Up @@ -708,10 +722,45 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
}
data, err := typeurl.MarshalAny(metrics)
if err != nil {
log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
log.L.Debugf("Stats error v1, id: %s: %v", r.ID, err)
return nil, err
}
log.L.Debugf("Stats success v1, id: %s: %+v", r.ID, data)
return &taskAPI.StatsResponse{
Stats: data,
}, nil
}

func (s *runscService) getV2Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
metrics := &cgroupsv2stats.Metrics{
// The CGroup V2 stats are in microseconds instead of nanoseconds so divide by 1000
CPU: &cgroupsv2stats.CPUStat{
UsageUsec: stats.Cpu.Usage.Total / 1000,
UserUsec: stats.Cpu.Usage.User / 1000,
SystemUsec: stats.Cpu.Usage.Kernel / 1000,
NrPeriods: stats.Cpu.Throttling.Periods,
NrThrottled: stats.Cpu.Throttling.ThrottledPeriods,
ThrottledUsec: stats.Cpu.Throttling.ThrottledTime / 1000,
},
Memory: &cgroupsv2stats.MemoryStat{
Usage: stats.Memory.Usage.Usage,
UsageLimit: stats.Memory.Usage.Limit,
SwapUsage: stats.Memory.Swap.Usage,
SwapLimit: stats.Memory.Swap.Limit,
Slab: stats.Memory.Kernel.Usage,
File: stats.Memory.Cache,
},
Pids: &cgroupsv2stats.PidsStat{
Current: stats.Pids.Current,
Limit: stats.Pids.Limit,
},
}
data, err := typeurl.MarshalAny(metrics)
if err != nil {
log.L.Debugf("Stats error v2, id: %s: %v", r.ID, err)
return nil, err
}
log.L.Debugf("Stats success, id: %s: %+v", r.ID, data)
log.L.Debugf("Stats success v2, id: %s: %+v", r.ID, data)
return &taskAPI.StatsResponse{
Stats: data,
}, nil
Expand Down

0 comments on commit ec422c0

Please sign in to comment.