Skip to content

Commit 636e001

Browse files
neonvm: apply code review fixes
pass cpuScalingMode as argument to the vm-runner rename arguments, constants and functions here and there drop unused code move default cpu scaling mode to controller argument Signed-off-by: Misha Sakhnov <[email protected]>
1 parent 82a4827 commit 636e001

File tree

10 files changed

+81
-103
lines changed

10 files changed

+81
-103
lines changed

neonvm-controller/cmd/main.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func main() {
9595
var concurrencyLimit int
9696
var skipUpdateValidationFor map[types.NamespacedName]struct{}
9797
var disableRunnerCgroup bool
98-
var useCpuSysfsStateScaling bool
98+
var defaultCpuScalingMode string
9999
var qemuDiskCacheSettings string
100100
var defaultMemoryProvider vmv1.MemoryProvider
101101
var memhpAutoMovableRatio string
@@ -133,7 +133,7 @@ func main() {
133133
return nil
134134
},
135135
)
136-
flag.BoolVar(&useCpuSysfsStateScaling, "use-cpu-sysfs-state-scaling", false, "Use sysfs cpu state scaling for CPU scaling")
136+
flag.StringVar(&defaultCpuScalingMode, "default-cpu-scaling-mode", vmv1.CpuScalingModeQMP, fmt.Sprintf("Default: CPU scaling: %s || %s", vmv1.CpuScalingModeQMP, vmv1.CpuScalingModeSysfs))
137137
flag.BoolVar(&disableRunnerCgroup, "disable-runner-cgroup", false, "Disable creation of a cgroup in neonvm-runner for fractional CPU limiting")
138138
flag.StringVar(&qemuDiskCacheSettings, "qemu-disk-cache-settings", "cache=none", "Set neonvm-runner's QEMU disk cache settings")
139139
flag.Func("default-memory-provider", "Set default memory provider to use for new VMs", defaultMemoryProvider.FlagFunc)
@@ -197,6 +197,7 @@ func main() {
197197
MemhpAutoMovableRatio: memhpAutoMovableRatio,
198198
FailurePendingPeriod: failurePendingPeriod,
199199
FailingRefreshInterval: failingRefreshInterval,
200+
DefaultCPUScalingMode: defaultCpuScalingMode,
200201
}
201202

202203
vmReconciler := &controllers.VMReconciler{

neonvm-daemon/cmd/main.go

-3
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,6 @@ func (s *cpuServer) handleGetCPUStatus(w http.ResponseWriter) {
6565
}
6666

6767
func (s *cpuServer) handleSetCPUStatus(w http.ResponseWriter, r *http.Request) {
68-
// TODO: should the call to this method be conditional, only if the statefs cpu scaling is enabled?
69-
// on the other hand, currently this endpoint is called by runner only if the statefs scaling is enabled
70-
// and it is a bit tricky to pass vmSpec here
7168
s.cpuOperationsMutex.Lock()
7269
defer s.cpuOperationsMutex.Unlock()
7370
body, err := io.ReadAll(r.Body)

neonvm-runner/cmd/main.go

+33-74
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"os/signal"
2121
"path/filepath"
2222
"regexp"
23-
"strconv"
2423
"strings"
2524
"sync"
2625
"sync/atomic"
@@ -612,10 +611,10 @@ type Config struct {
612611
kernelPath string
613612
appendKernelCmdline string
614613
skipCgroupManagement bool
615-
enableDummyCPUServer bool
616614
diskCacheSettings string
617615
memoryProvider vmv1.MemoryProvider
618616
autoMovableRatio string
617+
cpuScalingMode string
619618
}
620619

621620
func newConfig(logger *zap.Logger) *Config {
@@ -625,10 +624,10 @@ func newConfig(logger *zap.Logger) *Config {
625624
kernelPath: defaultKernelPath,
626625
appendKernelCmdline: "",
627626
skipCgroupManagement: false,
628-
enableDummyCPUServer: false,
629627
diskCacheSettings: "cache=none",
630628
memoryProvider: "", // Require that this is explicitly set. We'll check later.
631629
autoMovableRatio: "", // Require that this is explicitly set IFF memoryProvider is VirtioMem. We'll check later.
630+
cpuScalingMode: "", // Require that this is explicitly set. We'll check later.
632631
}
633632
flag.StringVar(&cfg.vmSpecDump, "vmspec", cfg.vmSpecDump,
634633
"Base64 encoded VirtualMachine json specification")
@@ -641,14 +640,12 @@ func newConfig(logger *zap.Logger) *Config {
641640
flag.BoolVar(&cfg.skipCgroupManagement, "skip-cgroup-management",
642641
cfg.skipCgroupManagement,
643642
"Don't try to manage CPU")
644-
flag.BoolVar(&cfg.enableDummyCPUServer, "enable-dummy-cpu-server",
645-
cfg.skipCgroupManagement,
646-
"Use with -skip-cgroup-management. Provide a CPU server but don't actually do anything with it")
647643
flag.StringVar(&cfg.diskCacheSettings, "qemu-disk-cache-settings",
648644
cfg.diskCacheSettings, "Cache settings to add to -drive args for VM disks")
649645
flag.Func("memory-provider", "Set provider for memory hotplug", cfg.memoryProvider.FlagFunc)
650646
flag.StringVar(&cfg.autoMovableRatio, "memhp-auto-movable-ratio",
651647
cfg.autoMovableRatio, "Set value of kernel's memory_hotplug.auto_movable_ratio [virtio-mem only]")
648+
flag.StringVar(&cfg.cpuScalingMode, "cpu-scaling-mode", "", "Set CPU scaling mode")
652649
flag.Parse()
653650

654651
if cfg.memoryProvider == "" {
@@ -657,8 +654,8 @@ func newConfig(logger *zap.Logger) *Config {
657654
if cfg.memoryProvider == vmv1.MemoryProviderVirtioMem && cfg.autoMovableRatio == "" {
658655
logger.Fatal("missing required flag '-memhp-auto-movable-ratio'")
659656
}
660-
if cfg.enableDummyCPUServer && !cfg.skipCgroupManagement {
661-
logger.Fatal("flag -enable-dummy-cpu-server requires -skip-cgroup-management")
657+
if cfg.cpuScalingMode == "" {
658+
logger.Fatal("missing required flag '-cpu-scaling-mode'")
662659
}
663660

664661
return cfg
@@ -894,22 +891,26 @@ func buildQEMUCmd(
894891
maxCPUs := vmSpec.Guest.CPUs.Max.RoundedUp()
895892
minCPUs := vmSpec.Guest.CPUs.Min.RoundedUp()
896893

897-
if vmSpec.CpuScalingMode != nil && *vmSpec.CpuScalingMode == vmv1.CpuScalingModeCpuSysfsState {
898-
// if we use sysfs based scaling we specify start cpus equal to max cpus
894+
switch cfg.cpuScalingMode {
895+
case vmv1.CpuScalingModeSysfs:
899896
qemuCmd = append(qemuCmd, "-smp", fmt.Sprintf(
897+
// if we use sysfs based scaling we specify initial value for cpus qemu arg equal to max cpus
900898
"cpus=%d,maxcpus=%d,sockets=1,cores=%d,threads=1",
901899
maxCPUs,
902900
maxCPUs,
903901
maxCPUs,
904902
))
905-
} else {
906-
// if we use hotplug we specify start cpus equal to min cpus and scale using udev rules for cpu plug events
903+
case vmv1.CpuScalingModeQMP:
904+
// if we use hotplug we specify initial value for cpus qemu arg equal to min cpus and scale using udev rules for cpu plug events
907905
qemuCmd = append(qemuCmd, "-smp", fmt.Sprintf(
908906
"cpus=%d,maxcpus=%d,sockets=1,cores=%d,threads=1",
909907
minCPUs,
910908
maxCPUs,
911909
maxCPUs,
912910
))
911+
default:
912+
// we should never get here because we validate the flag in newConfig
913+
panic(fmt.Errorf("unknown CPU scaling mode %s", cfg.cpuScalingMode))
913914
}
914915

915916
// memory details
@@ -998,8 +999,8 @@ func makeKernelCmdline(cfg *Config, vmSpec *vmv1.VirtualMachineSpec, vmStatus *v
998999
if cfg.appendKernelCmdline != "" {
9991000
cmdlineParts = append(cmdlineParts, cfg.appendKernelCmdline)
10001001
}
1001-
if vmSpec.CpuScalingMode != nil && *vmSpec.CpuScalingMode == vmv1.CpuScalingModeCpuSysfsState {
1002-
// if we use sysfs based scaling we need to specify the start cpus as min CPUs
1002+
if cfg.cpuScalingMode == vmv1.CpuScalingModeSysfs {
1003+
// if we use sysfs based scaling we need to specify the start cpus as min CPUs to mark every CPU except 0 as offline
10031004
cmdlineParts = append(cmdlineParts, fmt.Sprintf("maxcpus=%d", vmSpec.Guest.CPUs.Min.RoundedUp()))
10041005
}
10051006

@@ -1048,44 +1049,28 @@ func runQEMU(
10481049
wg := sync.WaitGroup{}
10491050

10501051
wg.Add(1)
1051-
useCpuSysfsStateScaling := false
1052-
if vmSpec.CpuScalingMode != nil && *vmSpec.CpuScalingMode == vmv1.CpuScalingModeCpuSysfsState {
1053-
useCpuSysfsStateScaling = true
1054-
}
10551052
go terminateQemuOnSigterm(ctx, logger, &wg)
1056-
if !cfg.skipCgroupManagement || cfg.enableDummyCPUServer || useCpuSysfsStateScaling {
1053+
if !cfg.skipCgroupManagement || cfg.cpuScalingMode == vmv1.CpuScalingModeSysfs {
10571054
var callbacks cpuServerCallbacks
10581055

1059-
if cfg.enableDummyCPUServer {
1060-
lastValue := &atomic.Uint32{}
1061-
lastValue.Store(uint32(vmSpec.Guest.CPUs.Min))
1062-
1063-
callbacks = cpuServerCallbacks{
1064-
get: func(logger *zap.Logger) (*vmv1.MilliCPU, error) {
1065-
return lo.ToPtr(vmv1.MilliCPU(lastValue.Load())), nil
1066-
},
1067-
set: func(logger *zap.Logger, cpu vmv1.MilliCPU) error {
1068-
if useCpuSysfsStateScaling {
1069-
err := setNeonvmDaemonCPU(cpu)
1070-
if err != nil {
1071-
logger.Error("setting CPU through NeonVM Daemon failed", zap.Any("cpu", cpu), zap.Error(err))
1072-
return err
1073-
}
1056+
lastValue := &atomic.Uint32{}
1057+
lastValue.Store(uint32(vmSpec.Guest.CPUs.Min))
1058+
1059+
callbacks = cpuServerCallbacks{
1060+
get: func(logger *zap.Logger) (*vmv1.MilliCPU, error) {
1061+
return lo.ToPtr(vmv1.MilliCPU(lastValue.Load())), nil
1062+
},
1063+
set: func(logger *zap.Logger, cpu vmv1.MilliCPU) error {
1064+
if cfg.cpuScalingMode == vmv1.CpuScalingModeSysfs {
1065+
err := setNeonvmDaemonCPU(cpu)
1066+
if err != nil {
1067+
logger.Error("setting CPU through NeonVM Daemon failed", zap.Any("cpu", cpu), zap.Error(err))
1068+
return err
10741069
}
1075-
lastValue.Store(uint32(cpu))
1076-
return nil
1077-
},
1078-
}
1079-
} else {
1080-
// Standard implementation -- actually set the cgroup
1081-
callbacks = cpuServerCallbacks{
1082-
get: func(logger *zap.Logger) (*vmv1.MilliCPU, error) {
1083-
return getCgroupQuota(cgroupPath)
1084-
},
1085-
set: func(logger *zap.Logger, cpu vmv1.MilliCPU) error {
1086-
return setCgroupLimit(logger, cpu, cgroupPath)
1087-
},
1088-
}
1070+
}
1071+
lastValue.Store(uint32(cpu))
1072+
return nil
1073+
},
10891074
}
10901075

10911076
wg.Add(1)
@@ -1493,32 +1478,6 @@ func setCgroupLimit(logger *zap.Logger, r vmv1.MilliCPU, cgroupPath string) erro
14931478
return nil
14941479
}
14951480

1496-
func getCgroupQuota(cgroupPath string) (*vmv1.MilliCPU, error) {
1497-
isV2 := cgroups.Mode() == cgroups.Unified
1498-
var path string
1499-
if isV2 {
1500-
path = filepath.Join(cgroupMountPoint, cgroupPath, "cpu.max")
1501-
} else {
1502-
path = filepath.Join(cgroupMountPoint, "cpu", cgroupPath, "cpu.cfs_quota_us")
1503-
}
1504-
data, err := os.ReadFile(path)
1505-
if err != nil {
1506-
return nil, err
1507-
}
1508-
1509-
arr := strings.Split(strings.Trim(string(data), "\n"), " ")
1510-
if len(arr) == 0 {
1511-
return nil, errors.New("unexpected cgroup data")
1512-
}
1513-
quota, err := strconv.ParseUint(arr[0], 10, 64)
1514-
if err != nil {
1515-
return nil, err
1516-
}
1517-
cpu := vmv1.MilliCPU(uint32(quota * 1000 / cgroupPeriod))
1518-
cpu /= cpuLimitOvercommitFactor
1519-
return &cpu, nil
1520-
}
1521-
15221481
func terminateQemuOnSigterm(ctx context.Context, logger *zap.Logger, wg *sync.WaitGroup) {
15231482
logger = logger.Named("terminate-qemu-on-sigterm")
15241483

neonvm/apis/neonvm/v1/virtualmachine_types.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ const (
5757
// that the VM should use QMP to scale CPUs.
5858
CpuScalingModeQMP string = "qmpScaling"
5959

60-
// CpuScalingModeCpuSysfsState is the value of the VirtualMachineSpec.CpuScalingMode field that
60+
// CpuScalingModeSysfs is the value of the VirtualMachineSpec.CpuScalingMode field that
6161
// indicates that the VM should use the CPU sysfs state interface to scale CPUs.
62-
CpuScalingModeCpuSysfsState string = "sysfsScaling"
62+
CpuScalingModeSysfs string = "sysfsScaling"
6363
)
6464

6565
// VirtualMachineUsage provides information about a VM's current usage. This is the type of the

pkg/neonvm/controllers/config.go

+3
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,7 @@ type ReconcilerConfig struct {
5050
// FailingRefreshInterval is the interval between consecutive
5151
// updates of metrics and logs, related to failing reconciliations
5252
FailingRefreshInterval time.Duration
53+
54+
// DefaultCPUScalingMode is the default CPU scaling mode that will be used for VMs with empty spec.cpuScalingMode
55+
DefaultCPUScalingMode string
5356
}

pkg/neonvm/controllers/functests/vm_controller_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ var _ = Describe("VirtualMachine controller", func() {
115115
MemhpAutoMovableRatio: "301",
116116
FailurePendingPeriod: 1 * time.Minute,
117117
FailingRefreshInterval: 1 * time.Minute,
118+
DefaultCPUScalingMode: vmv1.CpuScalingModeQMP,
118119
},
119120
}
120121

pkg/neonvm/controllers/vm_controller.go

+25-9
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,8 @@ func (r *VMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Re
166166

167167
// examine cpuScalingMode and set it to the default value if it is not set
168168
if vm.Spec.CpuScalingMode == nil || *vm.Spec.CpuScalingMode == "" {
169-
vm.Spec.CpuScalingMode = lo.ToPtr(vmv1.CpuScalingModeQMP)
169+
log.Info("Setting default CPU scaling mode", "default", r.Config.DefaultCPUScalingMode)
170+
vm.Spec.CpuScalingMode = lo.ToPtr(r.Config.DefaultCPUScalingMode)
170171
if err := r.tryUpdateVM(ctx, &vm); err != nil {
171172
log.Error(err, "Failed to set default CPU scaling mode")
172173
return ctrl.Result{}, err
@@ -309,10 +310,10 @@ func (r *VMReconciler) updateVMStatusCPU(
309310

310311
func (r *VMReconciler) updateVMStatusMemory(
311312
vm *vmv1.VirtualMachine,
312-
QmpMemorySize *resource.Quantity,
313+
qmpMemorySize *resource.Quantity,
313314
) {
314-
if vm.Status.MemorySize == nil || !QmpMemorySize.Equal(*vm.Status.MemorySize) {
315-
vm.Status.MemorySize = QmpMemorySize
315+
if vm.Status.MemorySize == nil || !qmpMemorySize.Equal(*vm.Status.MemorySize) {
316+
vm.Status.MemorySize = qmpMemorySize
316317
r.Recorder.Event(vm, "Normal", "MemoryInfo",
317318
fmt.Sprintf("VirtualMachine %s uses %v memory",
318319
vm.Name,
@@ -555,18 +556,31 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
555556
return err
556557
}
557558
var pluggedCPU uint32
558-
if vm.Spec.CpuScalingMode != nil && *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeCpuSysfsState {
559+
560+
if vm.Spec.CpuScalingMode == nil { // should not happen
561+
err := fmt.Errorf("CPU scaling mode is not set")
562+
log.Error(err, "Unknown CPU scaling mode", "VirtualMachine", vm.Name)
563+
return err
564+
}
565+
566+
switch *vm.Spec.CpuScalingMode {
567+
case vmv1.CpuScalingModeSysfs:
559568
log.Info("CPU scaling mode is set to CpuSysfsState, CPU usage check based on cgroups")
560569
pluggedCPU = cgroupUsage.VCPUs.RoundedUp()
561-
} else {
562-
// get CPU details from QEMU
570+
case vmv1.CpuScalingModeQMP:
571+
log.Info("CPU scaling mode is set to QMP, CPU usage check based on QMP")
563572
cpuSlotsPlugged, _, err := QmpGetCpus(QmpAddr(vm))
564573
if err != nil {
565574
log.Error(err, "Failed to get CPU details from VirtualMachine", "VirtualMachine", vm.Name)
566575
return err
567576
}
568577
pluggedCPU = uint32(len(cpuSlotsPlugged))
578+
default:
579+
err := fmt.Errorf("unsupported CPU scaling mode: %s", *vm.Spec.CpuScalingMode)
580+
log.Error(err, "Unknown CPU scaling mode", "VirtualMachine", vm.Name, "CPU scaling mode", *vm.Spec.CpuScalingMode)
581+
return err
569582
}
583+
570584
// update status by CPUs used in the VM
571585
r.updateVMStatusCPU(ctx, vm, vmRunner, pluggedCPU, cgroupUsage)
572586

@@ -1400,10 +1414,9 @@ func podSpec(
14001414
Command: func() []string {
14011415
cmd := []string{"runner"}
14021416
if config.DisableRunnerCgroup {
1403-
cmd = append(cmd, "-skip-cgroup-management")
14041417
// cgroup management disabled, but we still need something to provide
14051418
// the server, so the runner will just provide a dummy implementation.
1406-
cmd = append(cmd, "-enable-dummy-cpu-server")
1419+
cmd = append(cmd, "-skip-cgroup-management")
14071420
}
14081421
cmd = append(
14091422
cmd,
@@ -1420,6 +1433,9 @@ func podSpec(
14201433
"-vmspec", base64.StdEncoding.EncodeToString(vmSpecJson),
14211434
"-vmstatus", base64.StdEncoding.EncodeToString(vmStatusJson),
14221435
)
1436+
// NB: We don't need to check if the value is nil because the default value
1437+
// was set in Reconcile
1438+
cmd = append(cmd, "-cpu-scaling-mode", *vm.Spec.CpuScalingMode)
14231439
return cmd
14241440
}(),
14251441
Env: []corev1.EnvVar{{

pkg/neonvm/controllers/vm_controller_handle_cpu_scaling.go pkg/neonvm/controllers/vm_controller_cpu_scaling.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@ func (r *VMReconciler) handleCPUScaling(ctx context.Context, vm *vmv1.VirtualMac
1919

2020
log := log.FromContext(ctx)
2121
useCpuSysfsStateScaling := false
22-
if vm.Spec.CpuScalingMode != nil && *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeCpuSysfsState {
22+
if vm.Spec.CpuScalingMode != nil && *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeSysfs {
2323
useCpuSysfsStateScaling = true
2424
}
2525

2626
var scaled bool
2727
var err error
2828
if !useCpuSysfsStateScaling {
29-
scaled, err = r.handleQMPBasedCPUScaling(ctx, vm, vmRunner)
29+
scaled, err = r.handleCPUScalingQMP(ctx, vm, vmRunner)
3030
} else {
31-
scaled, err = r.delegateScalingToNeonvmDaemon(ctx, vm, vmRunner)
31+
scaled, err = r.handleCPUScalingSysfs(ctx, vm, vmRunner)
3232
}
3333

3434
if err != nil {
@@ -39,8 +39,8 @@ func (r *VMReconciler) handleCPUScaling(ctx context.Context, vm *vmv1.VirtualMac
3939
return scaled, nil
4040
}
4141

42-
// handleQMPBasedCPUScaling handles CPU scaling using QMP, extracted as is from doReconcile
43-
func (r *VMReconciler) handleQMPBasedCPUScaling(ctx context.Context, vm *vmv1.VirtualMachine, vmRunner *corev1.Pod) (bool, error) {
42+
// handleCPUScalingQMP handles CPU scaling using QMP, extracted as is from doReconcile
43+
func (r *VMReconciler) handleCPUScalingQMP(ctx context.Context, vm *vmv1.VirtualMachine, vmRunner *corev1.Pod) (bool, error) {
4444
log := log.FromContext(ctx)
4545
specCPU := vm.Spec.Guest.CPUs.Use
4646
cgroupUsage, err := getRunnerCgroup(ctx, vm)
@@ -91,7 +91,7 @@ func (r *VMReconciler) handleQMPBasedCPUScaling(ctx context.Context, vm *vmv1.Vi
9191
return hotPlugCPUScaled, nil
9292
}
9393

94-
func (r *VMReconciler) delegateScalingToNeonvmDaemon(ctx context.Context, vm *vmv1.VirtualMachine, vmRunner *corev1.Pod) (bool, error) {
94+
func (r *VMReconciler) handleCPUScalingSysfs(ctx context.Context, vm *vmv1.VirtualMachine, vmRunner *corev1.Pod) (bool, error) {
9595
log := log.FromContext(ctx)
9696
specCPU := vm.Spec.Guest.CPUs.Use
9797

pkg/neonvm/controllers/vm_controller_unit_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func newTestParams(t *testing.T) *testParams {
123123
MemhpAutoMovableRatio: "301",
124124
FailurePendingPeriod: time.Minute,
125125
FailingRefreshInterval: time.Minute,
126+
DefaultCPUScalingMode: vmv1.CpuScalingModeQMP,
126127
},
127128
Metrics: reconcilerMetrics,
128129
}

0 commit comments

Comments
 (0)