Skip to content

Commit

Permalink
Add ray.io/originated-from labels (ray-project#1830)
Browse files Browse the repository at this point in the history
  • Loading branch information
rueian authored Jan 23, 2024
1 parent 1fdf04c commit f05fa2e
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 44 deletions.
1 change: 0 additions & 1 deletion apiserver/pkg/util/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ const (
RayClusterUserLabelKey = "ray.io/user"
RayClusterVersionLabelKey = "ray.io/version"
RayClusterEnvironmentLabelKey = "ray.io/environment"
RayServiceLabelKey = "ray.io/service"
KubernetesApplicationNameLabelKey = "app.kubernetes.io/name"
KubernetesManagedByLabelKey = "app.kubernetes.io/managed-by"

Expand Down
2 changes: 0 additions & 2 deletions apiserver/pkg/util/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (

api "github.com/ray-project/kuberay/proto/go_client"
rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -38,7 +37,6 @@ func NewRayService(apiService *api.RayService, computeTemplateMap map[string]*ap

func buildRayServiceLabels(apiService *api.RayService) map[string]string {
labels := map[string]string{}
labels[RayServiceLabelKey] = apiService.Name
labels[RayClusterUserLabelKey] = apiService.User
labels[KubernetesApplicationNameLabelKey] = ApplicationName
labels[KubernetesManagedByLabelKey] = ComponentName
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, rayStar
container.Env = append(container.Env, portEnv)
}

if strings.ToLower(creator) == utils.RayServiceCreatorLabelValue {
if strings.EqualFold(creator, string(utils.RayServiceCRD)) {
// Only add this env for Ray Service cluster to improve service SLA.
if !envVarExists(utils.RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO, container.Env) {
deathEnv := corev1.EnvVar{Name: utils.RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO, Value: "0"}
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) {
cluster.Spec.EnableInTreeAutoscaling = &trueFlag
podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0))
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, "6379")
pod := BuildPod(podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", &trueFlag, utils.RayServiceCreatorLabelValue, "", false)
pod := BuildPod(podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", &trueFlag, string(utils.RayServiceCRD), "", false)

hasCorrectDeathEnv := false
for _, container := range pod.Spec.Containers {
Expand Down
21 changes: 0 additions & 21 deletions ray-operator/controllers/ray/common/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,3 @@ func BuildRouteForHeadService(cluster rayv1.RayCluster) (*routev1.Route, error)

return route, nil
}

// BuildRouteForRayService Builds the route for head service dashboard for RayService.
// This is used to expose dashboard for external traffic.
// RayService controller updates the ingress whenever a new RayCluster serves the traffic.
func BuildRouteForRayService(service rayv1.RayService, cluster rayv1.RayCluster) (*routev1.Route, error) {
route, err := BuildRouteForHeadService(cluster)
if err != nil {
return nil, err
}

serviceName, err := utils.GenerateHeadServiceName("RayService", cluster.Spec, cluster.Name)
if err != nil {
return nil, err
}
route.ObjectMeta.Name = serviceName
route.ObjectMeta.Namespace = service.Namespace
route.ObjectMeta.Labels[utils.RayServiceLabelKey] = service.Name
route.ObjectMeta.Labels[utils.RayIDLabelKey] = utils.CheckLabel(utils.GenerateIdentifier(service.Name, rayv1.HeadNode))

return route, nil
}
8 changes: 4 additions & 4 deletions ray-operator/controllers/ray/common/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ func BuildHeadServiceForRayService(rayService rayv1.RayService, rayCluster rayv1
service.ObjectMeta.Name = headSvcName
service.ObjectMeta.Namespace = rayService.Namespace
service.ObjectMeta.Labels = map[string]string{
utils.RayServiceLabelKey: rayService.Name,
utils.RayNodeTypeLabelKey: string(rayv1.HeadNode),
utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(rayService.Name, rayv1.HeadNode)),
utils.RayOriginatedFromLabelKey: utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, rayService.Name),
utils.RayNodeTypeLabelKey: string(rayv1.HeadNode),
utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(rayService.Name, rayv1.HeadNode)),
}

return service, nil
Expand All @@ -171,7 +171,7 @@ func BuildServeService(rayService rayv1.RayService, rayCluster rayv1.RayCluster,
}

labels := map[string]string{
utils.RayServiceLabelKey: name,
utils.RayOriginatedFromLabelKey: utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, name),
utils.RayClusterServingServiceLabelKey: utils.GenerateServeServiceLabel(name),
}
selectorLabels := map[string]string{
Expand Down
8 changes: 4 additions & 4 deletions ray-operator/controllers/ray/common/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,8 @@ func TestBuildServeServiceForRayService(t *testing.T) {
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
}

actualLabel := svc.Labels[utils.RayServiceLabelKey]
expectedLabel := string(serviceInstance.Name)
actualLabel := svc.Labels[utils.RayOriginatedFromLabelKey]
expectedLabel := utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, serviceInstance.Name)
if !reflect.DeepEqual(expectedLabel, actualLabel) {
t.Fatalf("Expected `%v` but got `%v`", expectedLabel, actualLabel)
}
Expand All @@ -461,8 +461,8 @@ func TestBuildServeServiceForRayCluster(t *testing.T) {
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
}

actualLabel := svc.Labels[utils.RayServiceLabelKey]
expectedLabel := string(instanceForServeSvc.Name)
actualLabel := svc.Labels[utils.RayOriginatedFromLabelKey]
expectedLabel := utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, instanceForServeSvc.Name)
if !reflect.DeepEqual(expectedLabel, actualLabel) {
t.Fatalf("Expected `%v` but got `%v`", expectedLabel, actualLabel)
}
Expand Down
8 changes: 7 additions & 1 deletion ray-operator/controllers/ray/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance *
Name: rayJobInstance.Name,
Namespace: rayJobInstance.Namespace,
Labels: map[string]string{
utils.RayOriginatedFromLabelKey: utils.RayOriginatedFromLabelValue(utils.RayJobCRD, rayJobInstance.Name),
utils.KubernetesCreatedByLabelKey: utils.ComponentName,
},
},
Expand Down Expand Up @@ -572,9 +573,14 @@ func (r *RayJobReconciler) getOrCreateRayClusterInstance(ctx context.Context, ra
}

func (r *RayJobReconciler) constructRayClusterForRayJob(rayJobInstance *rayv1.RayJob, rayClusterName string) (*rayv1.RayCluster, error) {
labels := make(map[string]string, len(rayJobInstance.Labels))
for key, value := range rayJobInstance.Labels {
labels[key] = value
}
labels[utils.RayOriginatedFromLabelKey] = utils.RayOriginatedFromLabelValue(utils.RayJobCRD, rayJobInstance.Name)
rayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Labels: rayJobInstance.Labels,
Labels: labels,
Annotations: rayJobInstance.Annotations,
Name: rayClusterName,
Namespace: rayJobInstance.Namespace,
Expand Down
1 change: 1 addition & 0 deletions ray-operator/controllers/ray/rayjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ var _ = Context("Inside the default namespace", func() {
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: myRayJob.Status.RayClusterName, Namespace: "default"}, myRayCluster),
time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayCluster = %v", myRayCluster.Name)
Expect(myRayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromLabelKey, utils.RayOriginatedFromLabelValue(utils.RayJobCRD, myRayJob.Name)))
})

It("Should create a number of workers equal to the replica setting", func() {
Expand Down
8 changes: 8 additions & 0 deletions ray-operator/controllers/ray/rayjob_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ func TestCreateK8sJobIfNeed(t *testing.T) {

err = rayJobReconciler.createK8sJobIfNeed(ctx, rayJob, rayCluster)
assert.NoError(t, err)

err = fakeClient.Get(ctx, types.NamespacedName{
Namespace: k8sJob.Namespace,
Name: k8sJob.Name,
}, k8sJob, nil)
assert.NoError(t, err)

assert.Equal(t, k8sJob.Labels[utils.RayOriginatedFromLabelKey], utils.RayOriginatedFromLabelValue(utils.RayJobCRD, rayJob.Name))
}

func TestGetSubmitterTemplate(t *testing.T) {
Expand Down
5 changes: 2 additions & 3 deletions ray-operator/controllers/ray/rayservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi
// cleanUpRayClusterInstance cleans up all the dangling RayCluster instances that are owned by the RayService instance.
func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, rayServiceInstance *rayv1.RayService) error {
rayClusterList := rayv1.RayClusterList{}
filterLabels := client.MatchingLabels{utils.RayServiceLabelKey: rayServiceInstance.Name}
filterLabels := client.MatchingLabels{utils.RayOriginatedFromLabelKey: utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, rayServiceInstance.Name)}
var err error
if err = r.List(ctx, &rayClusterList, client.InNamespace(rayServiceInstance.Namespace), filterLabels); err != nil {
r.Log.Error(err, "Fail to list RayCluster for "+rayServiceInstance.Name)
Expand Down Expand Up @@ -671,8 +671,7 @@ func (r *RayServiceReconciler) constructRayClusterForRayService(rayService *rayv
for k, v := range rayService.Labels {
rayClusterLabel[k] = v
}
rayClusterLabel[utils.RayServiceLabelKey] = rayService.Name
rayClusterLabel[utils.KubernetesCreatedByLabelKey] = utils.RayServiceCreatorLabelValue
rayClusterLabel[utils.RayOriginatedFromLabelKey] = utils.RayOriginatedFromLabelValue(utils.RayServiceCRD, rayService.Name)

rayClusterAnnotations := make(map[string]string)
for k, v := range rayService.Annotations {
Expand Down
21 changes: 17 additions & 4 deletions ray-operator/controllers/ray/utils/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,16 @@ const (
// Default application name
DefaultServeAppName = "default"
// Belows used as label key
RayServiceLabelKey = "ray.io/service"

// RayOriginatedFromLabelKey is the label used to associate the root KubeRay Custom Resource.
// For example, if a RayCluster is created by a RayJob named myjob, then the cluster will have
// A ray.io/originated-from=RayService_mysvc label will be added to the following resources if they are originated from a RayService mysvc.
// * Kubernetes Services
// * RayClusters
// A ray.io/originated-from=RayJob_myjob label will be added to the following resources if they are originated from a RayJob myjob.
// * Kubernetes Jobs
// * RayClusters
RayOriginatedFromLabelKey = "ray.io/originated-from"
RayClusterLabelKey = "ray.io/cluster"
RayNodeTypeLabelKey = "ray.io/node-type"
RayNodeGroupLabelKey = "ray.io/group"
Expand Down Expand Up @@ -74,9 +83,6 @@ const (
// The default name for kuberay operator
ComponentName = "kuberay-operator"

// The defaule RayService Identifier.
RayServiceCreatorLabelValue = "rayservice"

// Use as container env variable
RAY_CLUSTER_NAME = "RAY_CLUSTER_NAME"
RAY_IP = "RAY_IP"
Expand Down Expand Up @@ -154,3 +160,10 @@ const (
HeadService ServiceType = "headService"
ServingService ServiceType = "serveService"
)

// RayOriginatedFromLabelValue generates a value for the label RayOriginatedFromLabelKey
// This is also the only function to construct label filter of resources originated from a given CRDType.
func RayOriginatedFromLabelValue(kind CRDType, name string) string {
// we choose the _ as the separator because it is the only choice. ref: https://github.com/ray-project/kuberay/pull/1830#discussion_r1452547074
return string(kind) + "_" + name
}
2 changes: 1 addition & 1 deletion tests/framework/prototype.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def wait(self):
namespace = self.namespace, label_selector='ray.io/node-type=worker')
head_services = k8s_v1_api.list_namespaced_service(
namespace = self.namespace, label_selector =
f"ray.io/serve={self.custom_resource_object['metadata']['name']}-serve")
f"ray.io/originated-from=RayService_{self.custom_resource_object['metadata']['name']}-serve")
if (len(head_services.items) == 1 and len(headpods.items) == expected_head_pods
and len(workerpods.items) == expected_worker_pods
and check_pod_running(headpods.items) and check_pod_running(workerpods.items)):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sample_rayservice_yamls.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def wait(self):
namespace = self.namespace, label_selector='ray.io/node-type=worker')
serve_services = k8s_v1_api.list_namespaced_service(
namespace = self.namespace, label_selector =
f"ray.io/serve={self.custom_resource_object['metadata']['name']}-serve")
f"ray.io/originated-from=RayService_{self.custom_resource_object['metadata']['name']}-serve")

if (len(serve_services.items) == 1 and len(headpods.items) == expected_head_pods
and len(workerpods.items) == expected_worker_pods
Expand Down

0 comments on commit f05fa2e

Please sign in to comment.