diff --git a/pkg/neonvm/controllers/runner_cpu_limits.go b/pkg/neonvm/controllers/runner_cpu_limits.go index e0b45e34c..e55590895 100644 --- a/pkg/neonvm/controllers/runner_cpu_limits.go +++ b/pkg/neonvm/controllers/runner_cpu_limits.go @@ -13,11 +13,11 @@ import ( "github.com/neondatabase/autoscaling/pkg/api" ) -func setRunnerCPULimits(ctx context.Context, vm *vmv1.VirtualMachine, cpu vmv1.MilliCPU) error { +func setRunnerCPULimits(ctx context.Context, vm *vmv1.VirtualMachine, targetPodIP string, cpu vmv1.MilliCPU) error { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - url := fmt.Sprintf("http://%s:%d/cpu_change", vm.Status.PodIP, vm.Spec.RunnerPort) + url := fmt.Sprintf("http://%s:%d/cpu_change", targetPodIP, vm.Spec.RunnerPort) update := api.VCPUChange{VCPUs: cpu} diff --git a/pkg/neonvm/controllers/vm_controller_cpu_scaling.go b/pkg/neonvm/controllers/vm_controller_cpu_scaling.go index 9266ae820..b01007802 100644 --- a/pkg/neonvm/controllers/vm_controller_cpu_scaling.go +++ b/pkg/neonvm/controllers/vm_controller_cpu_scaling.go @@ -112,7 +112,7 @@ func (r *VMReconciler) handleCPUScalingSysfs(ctx context.Context, vm *vmv1.Virtu func (r *VMReconciler) handleCgroupCPUUpdate(ctx context.Context, vm *vmv1.VirtualMachine, cgroupUsage *api.VCPUCgroup) (bool, error) { specCPU := vm.Spec.Guest.CPUs.Use - if err := setRunnerCPULimits(ctx, vm, specCPU); err != nil { + if err := setRunnerCPULimits(ctx, vm, vm.Status.PodIP, specCPU); err != nil { return false, err } reason := "ScaleDown" diff --git a/pkg/neonvm/controllers/vmmigration_controller.go b/pkg/neonvm/controllers/vmmigration_controller.go index d0fca00cc..bafe9fd08 100644 --- a/pkg/neonvm/controllers/vmmigration_controller.go +++ b/pkg/neonvm/controllers/vmmigration_controller.go @@ -309,11 +309,16 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c migration.Status.SourcePodIP = vm.Status.PodIP migration.Status.TargetPodIP = targetRunner.Status.PodIP - // do hotplugCPU in targetRunner before migration + // do cpu hot plug in targetRunner before migration + // in case of QMP mode, we need to sync CPUs before migration + // in case of Sysfs mode, we need to sync CPUs during migration log.Info("Syncing CPUs in Target runner", "TargetPod.Name", migration.Status.TargetPodName) - if err := QmpSyncCpuToTarget(vm, migration); err != nil { - return ctrl.Result{}, err + if *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeQMP { + if err := QmpSyncCpuToTarget(vm, migration); err != nil { + return ctrl.Result{}, err + } } + log.Info("CPUs in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName) // do hotplug Memory in targetRunner -- only needed for dimm slots; virtio-mem Just Works™ @@ -334,8 +339,8 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider)) } - // Migrate only running VMs to target with plugged devices - if vm.Status.Phase == vmv1.VmPreMigrating { + switch vm.Status.Phase { + case vmv1.VmPreMigrating: // update VM status vm.Status.Phase = vmv1.VmMigrating if err := r.Status().Update(ctx, vm); err != nil { @@ -357,10 +362,22 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c Reason: "Reconciling", Message: message, }) - // finally update migration phase to Running + return r.updateMigrationStatus(ctx, migration) + case vmv1.VmMigrating: + // migration is in progress so we can scale CPU using sysfs + if *vm.Spec.CpuScalingMode == vmv1.CpuScalingModeSysfs { + if err := setRunnerCPULimits(ctx, + vm, + targetRunner.Status.PodIP, + vm.Spec.Guest.CPUs.Use); err != nil { + return ctrl.Result{}, err + } + } + // if cpu scaling is not sysfs based we just update VM status to Running, since migration is done at the moment migration.Status.Phase = vmv1.VmmRunning return r.updateMigrationStatus(ctx, migration) } + case runnerSucceeded: // target runner pod finished without error? but it shouldn't finish message := fmt.Sprintf("Target Pod (%s) completed suddenly", targetRunner.Name)