Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DFBUGS-1701: [release-4.17] Delete the succeeded pods with duplicate tolerations to avoid alert #3064

Open
wants to merge 1 commit into
base: release-4.17
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions controllers/storagecluster/cephcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ var (
testSkipPrometheusRules = false
)

var deletedSucceededPodsWithDuplicateTolerations bool

const (
osdPrepareLabelSelector = "rook-ceph-osd-prepare"
osdKeyRotationLabelSelector = "rook-ceph-osd-key-rotation"
)

func arbiterEnabled(sc *ocsv1.StorageCluster) bool {
return sc.Spec.Arbiter.Enable
}
Expand All @@ -112,6 +119,24 @@ func (obj *ocsCephCluster) ensureCreated(r *StorageClusterReconciler, sc *ocsv1.
return reconcile.Result{}, fmt.Errorf("'StorageDeviceSets' should not be initialized in an external CephCluster")
}

// The deletion function needs to run only once successfully on the cluster
if !deletedSucceededPodsWithDuplicateTolerations && !sc.Spec.ExternalStorage.Enable {
// delete the osd-prepare job completed pods
err = r.deleteSucceededPodsWithDuplicateTolerations(map[string]string{"app": osdPrepareLabelSelector}, sc.Namespace)
if err != nil {
return reconcile.Result{}, err
}
// if cluster wide or deviceSet encryption is true, delete the osd-key-rotation cronjob completed pods
if sc.Spec.Encryption.ClusterWide {
err = r.deleteSucceededPodsWithDuplicateTolerations(map[string]string{"app": osdKeyRotationLabelSelector}, sc.Namespace)
if err != nil {
return reconcile.Result{}, err
}
}
// after successful deletion set the value to true to check & prevent repeat rerun
deletedSucceededPodsWithDuplicateTolerations = true
}

for i, ds := range sc.Spec.StorageDeviceSets {
sc.Spec.StorageDeviceSets[i].Config.TuneSlowDeviceClass = false
sc.Spec.StorageDeviceSets[i].Config.TuneFastDeviceClass = false
Expand Down Expand Up @@ -1437,3 +1462,23 @@ func determineDefaultCephDeviceClass(foundDeviceClasses []rookCephv1.DeviceClass
}
return determinedDeviceClass
}

// deleteSucceededPodsWithDuplicateTolerations deletes the succeeded pods of the given app name which have duplicate tolerations
func (r *StorageClusterReconciler) deleteSucceededPodsWithDuplicateTolerations(labelSelector map[string]string, namespace string) error {
podList, err := statusutil.GetPodsWithLabels(r.ctx, r.Client, namespace, labelSelector)
if err != nil {
return err
}
for _, pod := range podList.Items {
if pod.Status.Phase == corev1.PodSucceeded {
if statusutil.HasDuplicateTolerations(pod.Spec.Tolerations) {
r.Log.Info("Deleting pod with duplicate tolerations", "pod", pod.Name)
err = r.Client.Delete(r.ctx, &pod)
if err != nil {
return err
}
}
}
}
return nil
}
15 changes: 15 additions & 0 deletions controllers/util/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ func GetPodsWithLabels(ctx context.Context, kubeClient client.Client, namespace
return podList, nil
}

// HasDuplicateTolerations returns true if a list has duplicate tolerations
func HasDuplicateTolerations(tolerations []corev1.Toleration) bool {
if len(tolerations) < 2 {
return false
}
duplicate := make(map[corev1.Toleration]bool)
for _, toleration := range tolerations {
if duplicate[toleration] {
return true
}
duplicate[toleration] = true
}
return false
}

// GetStorageClassWithName returns the storage class object by name
func GetStorageClassWithName(ctx context.Context, kubeClient client.Client, name string) *storagev1.StorageClass {
sc := &storagev1.StorageClass{}
Expand Down
Loading