From 15edd9ad5d5530bdebad9039917dddffedbe0729 Mon Sep 17 00:00:00 2001
From: Justyna Betkier <jbetkier@google.com>
Date: Tue, 31 Dec 2024 13:28:42 +0100
Subject: [PATCH] Improve events when max total nodes of the cluster is
 reached.

- log cluster wide event - previous event would never get fired because
  the estimators would already cap the options they generate and
additionally it would fire once and events are kept only for some time
- log per pod event explaining why the scale up is not triggered
  (previously it would either get no scale up because no matching group
or it would not get an event at all)
---
 cluster-autoscaler/core/static_autoscaler.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go
index 0b075640b22f..8475c91e7be6 100644
--- a/cluster-autoscaler/core/static_autoscaler.go
+++ b/cluster-autoscaler/core/static_autoscaler.go
@@ -526,6 +526,12 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
 		scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable
 		klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
+		autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached",
+			"Max total nodes in cluster reached: %v", autoscalingContext.MaxNodesTotal)
+		for _, pod := range unschedulablePodsToHelp {
+			autoscalingContext.Recorder.Event(pod, apiv1.EventTypeNormal, "NotTriggerScaleUp",
+				fmt.Sprintf("pod didn't trigger scale-up: %s", "max total nodes in cluster reached"))
+		}
 	} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
 		// The assumption here is that these pods have been created very recently and probably there
 		// is more pods to come. In theory we could check the newest pod time but then if pod were created