Skip to content

Commit

Permalink
update object watcher metrics
Browse files Browse the repository at this point in the history
Signed-off-by: zach593 <[email protected]>
  • Loading branch information
zach593 committed Feb 23, 2025
1 parent a5ca5cc commit 2ae10d5
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 25 deletions.
9 changes: 7 additions & 2 deletions pkg/controllers/execution/execution_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ func (c *Controller) cleanupPolicyClaimMetadata(ctx context.Context, work *workv
util.RemoveLabels(workload, util.ManagedResourceLabels...)
util.RemoveAnnotations(workload, util.ManagedResourceAnnotations...)

if err := c.ObjectWatcher.Update(ctx, cluster.Name, workload, clusterObj); err != nil {
updated, err := c.ObjectWatcher.Update(ctx, cluster.Name, workload, clusterObj)
metrics.CountUpdateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), cluster.Name, updated)
if err != nil {
klog.Errorf("Failed to update metadata in the given member cluster %v, err is %v", cluster.Name, err)
return err
}
Expand All @@ -232,6 +234,7 @@ func (c *Controller) tryDeleteWorkload(ctx context.Context, clusterName string,
}

err = c.ObjectWatcher.Delete(ctx, clusterName, workload)
metrics.CountDeleteResourceFromCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName)
if err != nil {
klog.Errorf("Failed to delete resource in the given member cluster %v, err is %v", clusterName, err)
return err
Expand Down Expand Up @@ -312,13 +315,15 @@ func (c *Controller) tryCreateOrUpdateWorkload(ctx context.Context, clusterName
return err
}
err = c.ObjectWatcher.Create(ctx, clusterName, workload)
metrics.CountCreateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName, false)
if err != nil {
return err
}
return nil
}

err = c.ObjectWatcher.Update(ctx, clusterName, workload, clusterObj)
updated, err := c.ObjectWatcher.Update(ctx, clusterName, workload, clusterObj)
metrics.CountUpdateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName, updated)
if err != nil {
return err
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/controllers/status/work_status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ func (c *WorkStatusController) updateResource(ctx context.Context, observedObj *
}

if needUpdate {
updateErr := c.ObjectWatcher.Update(ctx, clusterName, desiredObj, observedObj)
metrics.CountUpdateResourceToCluster(updateErr, desiredObj.GetAPIVersion(), desiredObj.GetKind(), clusterName)
updated, updateErr := c.ObjectWatcher.Update(ctx, clusterName, desiredObj, observedObj)
metrics.CountUpdateResourceToCluster(err, desiredObj.GetAPIVersion(), desiredObj.GetKind(), clusterName, updated)
if updateErr != nil {
klog.Errorf("Updating %s failed: %v", fedKey.String(), updateErr)
return updateErr
Expand Down Expand Up @@ -299,7 +299,6 @@ func (c *WorkStatusController) handleDeleteEvent(ctx context.Context, key keys.F
}

reCreateErr := c.recreateResourceIfNeeded(ctx, work, key)
metrics.CountRecreateResourceToCluster(reCreateErr, key.GroupVersion().String(), key.Kind, key.Cluster)
if reCreateErr != nil {
c.updateAppliedCondition(ctx, work, metav1.ConditionFalse, "ReCreateFailed", reCreateErr.Error())
return reCreateErr
Expand All @@ -321,6 +320,7 @@ func (c *WorkStatusController) recreateResourceIfNeeded(ctx context.Context, wor
manifest.GetName() == workloadKey.Name {
klog.Infof("Recreating resource(%s).", workloadKey.String())
err := c.ObjectWatcher.Create(ctx, workloadKey.Cluster, manifest)
metrics.CountCreateResourceToCluster(err, workloadKey.GroupVersion().String(), workloadKey.Kind, workloadKey.Cluster, true)
if err != nil {
c.eventf(manifest, corev1.EventTypeWarning, events.EventReasonSyncWorkloadFailed, "Failed to create or update resource(%s/%s) in member cluster(%s): %v", manifest.GetNamespace(), manifest.GetName(), workloadKey.Cluster, err)
return err
Expand Down
37 changes: 25 additions & 12 deletions pkg/metrics/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package metrics

import (
"fmt"
"time"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -30,8 +31,9 @@ const (
policyApplyAttemptsMetricsName = "policy_apply_attempts_total"
syncWorkDurationMetricsName = "binding_sync_work_duration_seconds"
syncWorkloadDurationMetricsName = "work_sync_workload_duration_seconds"
recreateResourceToCluster = "recreate_resource_to_cluster"
createResourceToCluster = "create_resource_to_cluster"
updateResourceToCluster = "update_resource_to_cluster"
deleteResourceFromCluster = "delete_resource_from_cluster"
policyPreemptionMetricsName = "policy_preemption_total"
cronFederatedHPADurationMetricsName = "cronfederatedhpa_process_duration_seconds"
cronFederatedHPARuleDurationMetricsName = "cronfederatedhpa_rule_process_duration_seconds"
Expand Down Expand Up @@ -69,14 +71,19 @@ var (
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
}, []string{"result"})

recreateResourceWhenSyncWorkStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: recreateResourceToCluster,
createResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: createResourceToCluster,
Help: "Number of recreating operation of the resource to a target member cluster. By the result, 'error' means a resource recreated failed. Otherwise 'success'. Cluster means the target member cluster.",
}, []string{"result", "apiversion", "kind", "cluster"})
}, []string{"result", "apiversion", "kind", "cluster", "recreate"})

updateResourceWhenSyncWorkStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
updateResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: updateResourceToCluster,
Help: "Number of updating operation of the resource to a target member cluster. By the result, 'error' means a resource updated failed. Otherwise 'success'. Cluster means the target member cluster.",
}, []string{"result", "apiversion", "kind", "cluster", "updated"})

deleteResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: deleteResourceFromCluster,
Help: "Number of deleting operation of the resource from a target member cluster. By the result, 'error' means a resource updated failed. Otherwise 'success'. Cluster means the target member cluster.",
}, []string{"result", "apiversion", "kind", "cluster"})

policyPreemptionCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Expand Down Expand Up @@ -130,14 +137,19 @@ func ObserveSyncWorkloadLatency(err error, start time.Time) {
syncWorkloadDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
}

// CountRecreateResourceToCluster records the number of recreating operation of the resource to a target member cluster.
func CountRecreateResourceToCluster(err error, apiVersion, kind, cluster string) {
recreateResourceWhenSyncWorkStatus.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
// CountCreateResourceToCluster records the number of recreating operation of the resource to a target member cluster.
func CountCreateResourceToCluster(err error, apiVersion, kind, cluster string, recreate bool) {
createResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster, fmt.Sprint(recreate)).Inc()
}

// CountUpdateResourceToCluster records the number of updating operation of the resource to a target member cluster.
func CountUpdateResourceToCluster(err error, apiVersion, kind, cluster string) {
updateResourceWhenSyncWorkStatus.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
func CountUpdateResourceToCluster(err error, apiVersion, kind, cluster string, updated bool) {
updateResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster, fmt.Sprint(updated)).Inc()
}

// CountDeleteResourceFromCluster records the number of updating operation of the resource to a target member cluster.
func CountDeleteResourceFromCluster(err error, apiVersion, kind, cluster string) {
deleteResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
}

// CountPolicyPreemption records the numbers of policy preemption.
Expand Down Expand Up @@ -173,8 +185,9 @@ func ResourceCollectors() []prometheus.Collector {
policyApplyAttempts,
syncWorkDurationHistogram,
syncWorkloadDurationHistogram,
recreateResourceWhenSyncWorkStatus,
updateResourceWhenSyncWorkStatus,
createResourceWhenSyncWork,
updateResourceWhenSyncWork,
deleteResourceWhenSyncWork,
policyPreemptionCounter,
cronFederatedHPADurationHistogram,
cronFederatedHPARuleDurationHistogram,
Expand Down
16 changes: 8 additions & 8 deletions pkg/util/objectwatcher/objectwatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
// ObjectWatcher manages operations for object dispatched to member clusters.
type ObjectWatcher interface {
Create(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error
Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) error
Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) (bool, error)
Delete(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error
NeedsUpdate(clusterName string, desiredObj, clusterObj *unstructured.Unstructured) (bool, error)
}
Expand Down Expand Up @@ -138,43 +138,43 @@ func (o *objectWatcherImpl) retainClusterFields(desired, observed *unstructured.
return desired, nil
}

func (o *objectWatcherImpl) Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) error {
func (o *objectWatcherImpl) Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) (bool, error) {
updateAllowed := o.allowUpdate(clusterName, desireObj, clusterObj)
if !updateAllowed {
// The existing resource is not managed by Karmada, and no conflict resolution found, avoid updating the existing resource by default.
return fmt.Errorf("resource(kind=%s, %s/%s) already exists in the cluster %v and the %s strategy value is empty, Karmada will not manage this resource",
return false, fmt.Errorf("resource(kind=%s, %s/%s) already exists in the cluster %v and the %s strategy value is empty, Karmada will not manage this resource",
desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, workv1alpha2.ResourceConflictResolutionAnnotation)
}

dynamicClusterClient, err := o.ClusterClientSetFunc(clusterName, o.KubeClientSet)
if err != nil {
klog.Errorf("Failed to build dynamic cluster client for cluster %s, err: %v.", clusterName, err)
return err
return false, err
}

gvr, err := restmapper.GetGroupVersionResource(o.RESTMapper, desireObj.GroupVersionKind())
if err != nil {
klog.Errorf("Failed to update the resource(kind=%s, %s/%s) in the cluster %s as mapping GVK to GVR failed: %v", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, err)
return err
return false, err
}

desireObj, err = o.retainClusterFields(desireObj, clusterObj)
if err != nil {
klog.Errorf("Failed to retain fields for resource(kind=%s, %s/%s) in cluster %s: %v", clusterObj.GetKind(), clusterObj.GetNamespace(), clusterObj.GetName(), clusterName, err)
return err
return false, err
}

resource, err := dynamicClusterClient.DynamicClientSet.Resource(gvr).Namespace(desireObj.GetNamespace()).Update(ctx, desireObj, metav1.UpdateOptions{})
if err != nil {
klog.Errorf("Failed to update resource(kind=%s, %s/%s) in cluster %s, err: %v.", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, err)
return err
return false, err
}

klog.Infof("Updated the resource(kind=%s, %s/%s) on cluster(%s).", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName)

// record version
o.recordVersion(resource, clusterName)
return nil
return true, nil
}

func (o *objectWatcherImpl) Delete(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error {
Expand Down

0 comments on commit 2ae10d5

Please sign in to comment.