Skip to content

Commit

Permalink
neonvm: handle sysfs scaling mode in migrations
Browse files Browse the repository at this point in the history
Add separate phase handling step for running migration to call neonvm-daemon
which is not accessible in pre-migration phase

Signed-off-by: Mikhail Sakhnov <[email protected]>
  • Loading branch information
mikhail-sakhnov committed Jan 14, 2025
1 parent 99bf327 commit 73b56a9
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
4 changes: 2 additions & 2 deletions pkg/neonvm/controllers/runner_cpu_limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import (
"github.com/neondatabase/autoscaling/pkg/api"
)

func setRunnerCPULimits(ctx context.Context, vm *vmv1.VirtualMachine, cpu vmv1.MilliCPU) error {
func setRunnerCPULimits(ctx context.Context, vm *vmv1.VirtualMachine, targetPodIP string, cpu vmv1.MilliCPU) error {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

url := fmt.Sprintf("http://%s:%d/cpu_change", vm.Status.PodIP, vm.Spec.RunnerPort)
url := fmt.Sprintf("http://%s:%d/cpu_change", targetPodIP, vm.Spec.RunnerPort)

update := api.VCPUChange{VCPUs: cpu}

Expand Down
2 changes: 1 addition & 1 deletion pkg/neonvm/controllers/vm_controller_cpu_scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (r *VMReconciler) handleCPUScalingSysfs(ctx context.Context, vm *vmv1.Virtu

func (r *VMReconciler) handleCgroupCPUUpdate(ctx context.Context, vm *vmv1.VirtualMachine, cgroupUsage *api.VCPUCgroup) (bool, error) {
specCPU := vm.Spec.Guest.CPUs.Use
if err := setRunnerCPULimits(ctx, vm, specCPU); err != nil {
if err := setRunnerCPULimits(ctx, vm, vm.Status.PodIP, specCPU); err != nil {
return false, err
}
reason := "ScaleDown"
Expand Down
29 changes: 23 additions & 6 deletions pkg/neonvm/controllers/vmmigration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,16 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
migration.Status.SourcePodIP = vm.Status.PodIP
migration.Status.TargetPodIP = targetRunner.Status.PodIP

// do hotplugCPU in targetRunner before migration
// do cpu hot plug in targetRunner before migration
// in case of QMP mode, we need to sync CPUs before migration
// in case of Sysfs mode, we need to sync CPUs during migration
log.Info("Syncing CPUs in Target runner", "TargetPod.Name", migration.Status.TargetPodName)
if err := QmpSyncCpuToTarget(vm, migration); err != nil {
return ctrl.Result{}, err
if *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeQMP {
if err := QmpSyncCpuToTarget(vm, migration); err != nil {
return ctrl.Result{}, err
}
}

log.Info("CPUs in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName)

// do hotplug Memory in targetRunner -- only needed for dimm slots; virtio-mem Just Works™
Expand All @@ -334,8 +339,8 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider))
}

// Migrate only running VMs to target with plugged devices
if vm.Status.Phase == vmv1.VmPreMigrating {
switch vm.Status.Phase {
case vmv1.VmPreMigrating:
// update VM status
vm.Status.Phase = vmv1.VmMigrating
if err := r.Status().Update(ctx, vm); err != nil {
Expand All @@ -357,10 +362,22 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
Reason: "Reconciling",
Message: message,
})
// finally update migration phase to Running
return r.updateMigrationStatus(ctx, migration)
case vmv1.VmMigrating:
// migration is in progress so we can scale CPU using sysfs
if *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeSysfs {
if err := setRunnerCPULimits(ctx,
vm,
targetRunner.Status.PodIP,
vm.Spec.Guest.CPUs.Use); err != nil {
return ctrl.Result{}, err
}
}
// if cpu scaling is not sysfs based we just update VM status to Running, since migration is done at the moment
migration.Status.Phase = vmv1.VmmRunning
return r.updateMigrationStatus(ctx, migration)
}

case runnerSucceeded:
// target runner pod finished without error? but it shouldn't finish
message := fmt.Sprintf("Target Pod (%s) completed suddenly", targetRunner.Name)
Expand Down

0 comments on commit 73b56a9

Please sign in to comment.