Skip to content

Commit f2fbfd2

Browse files
committed
feat: robert self-test
This PR adds both one-off and periodic self-test capability that runs RoBERTa on 1 gpu as a Kubernetes Job/CronJob. No client-side support is needed, except for the one-time creation of these resources. also: make sure the self-test yamls reflect the latest image versions
1 parent e59bee2 commit f2fbfd2

File tree

14 files changed

+370
-4
lines changed

14 files changed

+370
-4
lines changed

.dockerignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
store/**/*.md
1+
store/**/*.md
2+
*~

.github/workflows/self-test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ jobs:
4141
TERM: xterm
4242
DEBUG_KUBERNETES: true
4343
TEST_LOG_AGGREGATOR: true
44-
run: ./deploy/self-test/run.sh
44+
run: ./tests/self-test/run.sh

deploy/self-test/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
github.yaml
+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: codeflare-self-test-serviceaccount
5+
---
6+
kind: Role
7+
apiVersion: rbac.authorization.k8s.io/v1
8+
metadata:
9+
name: codeflare-self-test-role
10+
rules:
11+
- apiGroups: [""]
12+
resources: ["pods", "pods/exec", "services", "events", "secrets"]
13+
verbs: ["create", "delete", "get", "watch", "list"]
14+
- apiGroups: ["apps"]
15+
resources: ["deployments", "replicasets"]
16+
verbs: ["create", "delete", "get", "watch", "list"]
17+
- apiGroups: ["batch"]
18+
resources: ["jobs"]
19+
verbs: ["create", "get", "watch", "list"]
20+
- apiGroups: [""]
21+
resources: ["pods/exec", "pods/portforward"]
22+
verbs: ["create", "delete"]
23+
#- apiGroups: ["apps"]
24+
# resources: [deployments]
25+
# verbs: [get, list]
26+
---
27+
apiVersion: rbac.authorization.k8s.io/v1
28+
kind: RoleBinding
29+
metadata:
30+
name: codeflare-self-test-rolebinding
31+
subjects:
32+
- kind: ServiceAccount
33+
name: codeflare-self-test-serviceaccount
34+
roleRef:
35+
kind: Role
36+
name: codeflare-self-test-role
37+
apiGroup: rbac.authorization.k8s.io
38+
39+
---
40+
apiVersion: batch/v1
41+
kind: CronJob
42+
metadata:
43+
name: codeflare-self-test-roberta-1gpu-periodic
44+
spec:
45+
schedule: "0/30 * * * *" # every 30 minutes, starting from the top of the hour (see crontab.guru)
46+
jobTemplate:
47+
spec:
48+
concurrencyPolicy: Forbid
49+
failedJobsHistoryLimit: 1000
50+
successfulJobsHistoryLimit: 1000
51+
template:
52+
spec:
53+
serviceAccountName: codeflare-self-test-serviceaccount
54+
containers:
55+
- name: self-test
56+
image: ghcr.io/project-codeflare/codeflare-self-test:0.10.4
57+
env:
58+
# - name: GUIDEBOOK_RUN_ARGS
59+
# value: "-V"
60+
- name: VARIANTS
61+
value: roberta-1gpu
62+
- name: ML_CODEFLARE_ROBERTA_GITHUB_USER
63+
valueFrom:
64+
secretKeyRef:
65+
name: github
66+
key: GITHUB_USER
67+
- name: ML_CODEFLARE_ROBERTA_GITHUB_TOKEN
68+
valueFrom:
69+
secretKeyRef:
70+
name: github
71+
key: GITHUB_TOKEN
72+
- name: MODE
73+
value: development # otherwise building codeflare-cli takes a huge amount of memory
74+
- name: KUBE_CONTEXT_FOR_TEST
75+
value: kind-codeflare-test # must match with tests/kind/profiles/...
76+
- name: KUBE_NS_FOR_TEST
77+
value: default # must match with tests/kind/profiles/...
78+
- name: CODEFLARE_NAMESPACE_RESTRICTED # restrict use of cluster-scoped resources
79+
value: "true"
80+
restartPolicy: Never
+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: codeflare-self-test-serviceaccount
5+
---
6+
kind: Role
7+
apiVersion: rbac.authorization.k8s.io/v1
8+
metadata:
9+
name: codeflare-self-test-role
10+
rules:
11+
- apiGroups: [""]
12+
resources: ["pods", "pods/exec", "services", "events", "secrets"]
13+
verbs: ["create", "delete", "get", "watch", "list"]
14+
- apiGroups: ["apps"]
15+
resources: ["deployments", "replicasets"]
16+
verbs: ["create", "delete", "get", "watch", "list"]
17+
- apiGroups: ["batch"]
18+
resources: ["jobs"]
19+
verbs: ["create", "get", "watch", "list"]
20+
- apiGroups: [""]
21+
resources: ["pods/exec", "pods/portforward"]
22+
verbs: ["create", "delete"]
23+
#- apiGroups: ["apps"]
24+
# resources: [deployments]
25+
# verbs: [get, list]
26+
---
27+
apiVersion: rbac.authorization.k8s.io/v1
28+
kind: RoleBinding
29+
metadata:
30+
name: codeflare-self-test-rolebinding
31+
subjects:
32+
- kind: ServiceAccount
33+
name: codeflare-self-test-serviceaccount
34+
roleRef:
35+
kind: Role
36+
name: codeflare-self-test-role
37+
apiGroup: rbac.authorization.k8s.io
38+
39+
---
40+
apiVersion: batch/v1
41+
kind: Job
42+
metadata:
43+
name: codeflare-self-test-roberta-1gpu-once
44+
spec:
45+
completions: 1
46+
parallelism: 1
47+
template:
48+
spec:
49+
serviceAccountName: codeflare-self-test-serviceaccount
50+
containers:
51+
- name: self-test
52+
image: ghcr.io/project-codeflare/codeflare-self-test:0.10.4
53+
env:
54+
# - name: GUIDEBOOK_RUN_ARGS
55+
# value: "-V"
56+
- name: VARIANTS
57+
value: roberta-1gpu
58+
- name: ML_CODEFLARE_ROBERTA_GITHUB_USER
59+
valueFrom:
60+
secretKeyRef:
61+
name: github
62+
key: GITHUB_USER
63+
- name: ML_CODEFLARE_ROBERTA_GITHUB_TOKEN
64+
valueFrom:
65+
secretKeyRef:
66+
name: github
67+
key: GITHUB_TOKEN
68+
- name: MODE
69+
value: development # otherwise building codeflare-cli takes a huge amount of memory
70+
- name: KUBE_CONTEXT_FOR_TEST
71+
value: kind-codeflare-test # must match with tests/kind/profiles/...
72+
- name: KUBE_NS_FOR_TEST
73+
value: default # must match with tests/kind/profiles/...
74+
- name: CODEFLARE_NAMESPACE_RESTRICTED # restrict use of cluster-scoped resources
75+
value: "true"
76+
restartPolicy: Never
77+
backoffLimit: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: codeflare-self-test-serviceaccount
5+
---
6+
kind: Role
7+
apiVersion: rbac.authorization.k8s.io/v1
8+
metadata:
9+
name: codeflare-self-test-role
10+
rules:
11+
- apiGroups: [""]
12+
resources: ["pods", "pods/exec", "services", "events", "secrets"]
13+
verbs: ["create", "delete", "get", "watch", "list"]
14+
- apiGroups: ["apps"]
15+
resources: ["deployments", "replicasets"]
16+
verbs: ["create", "delete", "get", "watch", "list"]
17+
- apiGroups: ["batch"]
18+
resources: ["jobs"]
19+
verbs: ["create", "get", "watch", "list"]
20+
- apiGroups: [""]
21+
resources: ["pods/exec", "pods/portforward"]
22+
verbs: ["create", "delete"]
23+
#- apiGroups: ["apps"]
24+
# resources: [deployments]
25+
# verbs: [get, list]
26+
---
27+
apiVersion: rbac.authorization.k8s.io/v1
28+
kind: RoleBinding
29+
metadata:
30+
name: codeflare-self-test-rolebinding
31+
subjects:
32+
- kind: ServiceAccount
33+
name: codeflare-self-test-serviceaccount
34+
roleRef:
35+
kind: Role
36+
name: codeflare-self-test-role
37+
apiGroup: rbac.authorization.k8s.io
38+
39+
---
40+
apiVersion: batch/v1
41+
kind: CronJob
42+
metadata:
43+
name: codeflare-self-test-roberta-1gpu
44+
spec:
45+
schedule: "*/30 * * * *" # every 30 minutes see crontab.guru
46+
jobTemplate:
47+
spec:
48+
template:
49+
spec:
50+
serviceAccountName: codeflare-self-test-serviceaccount
51+
containers:
52+
- name: self-test
53+
image: ghcr.io/project-codeflare/codeflare-self-test:0.10.4
54+
env:
55+
# - name: GUIDEBOOK_RUN_ARGS
56+
# value: "-V"
57+
- name: VARIANTS
58+
value: roberta-1gpu
59+
- name: ML_CODEFLARE_ROBERTA_GITHUB_USER
60+
valueFrom:
61+
secretKeyRef:
62+
name: github
63+
key: GITHUB_USER
64+
- name: ML_CODEFLARE_ROBERTA_GITHUB_TOKEN
65+
valueFrom:
66+
secretKeyRef:
67+
name: github
68+
key: GITHUB_TOKEN
69+
- name: MODE
70+
value: development # otherwise building codeflare-cli takes a huge amount of memory
71+
- name: KUBE_CONTEXT_FOR_TEST
72+
value: kind-codeflare-test # must match with tests/kind/profiles/...
73+
- name: KUBE_NS_FOR_TEST
74+
value: default # must match with tests/kind/profiles/...
75+
- name: CODEFLARE_NAMESPACE_RESTRICTED # restrict use of cluster-scoped resources
76+
value: "true"
77+
restartPolicy: Never

package.json

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@
124124
"infile": "CHANGELOG.md"
125125
},
126126
"@release-it/bumper": {
127+
"out": "deploy/self-test/self-test.yaml",
128+
"out": "deploy/self-test/self-test-roberta.yaml",
127129
"out": "plugins/plugin-client-default/package.json"
128130
}
129131
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"name": "keep-it-simple",
3+
"creationTime": 1660657756574,
4+
"lastModifiedTime": 1661642588298,
5+
"lastUsedTime": 1661643221215,
6+
"choices": {
7+
"madwizard/apriori/use-gpu": "don't use gpus",
8+
"madwizard/apriori/arch": "x64",
9+
"madwizard/apriori/platform": "darwin",
10+
"madwizard/apriori/mac-installer": "Homebrew",
11+
"madwizard/apriori/in-terminal": "HTML",
12+
"Start a new Run####Connect Dashboard to an existing Run####Boot up a Cloud Computer####Shut down a Cloud Computer": "Start a new Run",
13+
"Run with CodeFlare Model Architecture####Bring Your Own Code####Demos": "Run with CodeFlare Model Architecture",
14+
"Training Tasks####Fine Tuning Tasks": "Training Tasks",
15+
"Train a Masked Language Model": "Train a Masked Language Model",
16+
"I want to run a quick test with sample data####I have my own custom input data on S3": "I want to run a quick test with sample data",
17+
"AWS####IBM####My data is not stored in S3": "My data is not stored in S3",
18+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
19+
"My Cluster is Running Locally####My Cluster is Running on Kubernetes": "My Cluster is Running on Kubernetes",
20+
"expand((kubectl config get-contexts -o name | grep -E . >& /dev/null && kubectl config get-contexts -o name) || (kubectl version | grep Server >& /dev/null && echo \"${KUBE_CONTEXT_FOR_TEST-In-cluster}\" || exit 1), Kubernetes contexts)": "kind-codeflare-test",
21+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || X=$([ -n \"$KUBE_NS_FOR_TEST\" ] && echo $KUBE_NS_FOR_TEST || kubectl ${KUBE_CONTEXT_ARG} get ns -o name || oc ${KUBE_CONTEXT_ARG} get projects -o name); echo \"$X\" | sed -E 's#(namespace|project.project.openshift.io)/##' | grep -Ev 'openshift|kube-', Kubernetes namespaces)####Create a namespace": "default",
22+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"1\",\"Maximum Workers\":\"1\",\"Worker Memory\":\"8Gi\",\"Head Memory\":\"8Gi\"}",
23+
"Keep It Simple####Use the Ray Autoscaler####Use the Multi-user Enhanced Kubernetes Scheduler": "Keep It Simple"
24+
}
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"name": "mcad-coscheduler",
3+
"creationTime": 1660657756574,
4+
"lastModifiedTime": 1660747919298,
5+
"lastUsedTime": 1660755725660,
6+
"choices": {
7+
"madwizard/apriori/use-gpu": "don't use gpus",
8+
"madwizard/apriori/arch": "x64",
9+
"madwizard/apriori/platform": "darwin",
10+
"madwizard/apriori/mac-installer": "Homebrew",
11+
"madwizard/apriori/in-terminal": "HTML",
12+
"Start a new Run####Connect Dashboard to an existing Run####Boot up a Cloud Computer####Shut down a Cloud Computer": "Start a new Run",
13+
"Run with CodeFlare Model Architecture####Bring Your Own Code####Demos": "Run with CodeFlare Model Architecture",
14+
"Training Tasks####Fine Tuning Tasks": "Training Tasks",
15+
"Train a Masked Language Model": "Train a Masked Language Model",
16+
"I want to run a quick test with sample data####I have my own custom input data on S3": "I want to run a quick test with sample data",
17+
"AWS####IBM####My data is not stored in S3": "My data is not stored in S3",
18+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
19+
"My Cluster is Running Locally####My Cluster is Running on Kubernetes": "My Cluster is Running on Kubernetes",
20+
"expand((kubectl config get-contexts -o name | grep -E . >& /dev/null && kubectl config get-contexts -o name) || (kubectl version | grep Server >& /dev/null && echo \"${KUBE_CONTEXT_FOR_TEST-In-cluster}\" || exit 1), Kubernetes contexts)": "kind-codeflare-test",
21+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || X=$([ -n \"$KUBE_NS_FOR_TEST\" ] && echo $KUBE_NS_FOR_TEST || kubectl ${KUBE_CONTEXT_ARG} get ns -o name || oc ${KUBE_CONTEXT_ARG} get projects -o name); echo \"$X\" | sed -E 's#(namespace|project.project.openshift.io)/##' | grep -Ev 'openshift|kube-', Kubernetes namespaces)####Create a namespace": "default",
22+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"1\",\"Maximum Workers\":\"1\",\"Worker Memory\":\"8Gi\",\"Head Memory\":\"8Gi\"}",
23+
"Keep It Simple####Use the Ray Autoscaler####Use the Multi-user Enhanced Kubernetes Scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler",
24+
"My administrator has already installed and configured MCAD####MCAD with the Advanced Coscheduler####MCAD with the Default Kubernetes Scheduler": "MCAD with the Advanced Coscheduler"
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"name": "mcad-default",
3+
"creationTime": 1660657756574,
4+
"lastModifiedTime": 1660747919298,
5+
"lastUsedTime": 1660753306596,
6+
"choices": {
7+
"madwizard/apriori/use-gpu": "don't use gpus",
8+
"madwizard/apriori/arch": "x64",
9+
"madwizard/apriori/platform": "darwin",
10+
"madwizard/apriori/mac-installer": "Homebrew",
11+
"madwizard/apriori/in-terminal": "HTML",
12+
"Start a new Run####Connect Dashboard to an existing Run####Boot up a Cloud Computer####Shut down a Cloud Computer": "Start a new Run",
13+
"Run with CodeFlare Model Architecture####Bring Your Own Code####Demos": "Run with CodeFlare Model Architecture",
14+
"Training Tasks####Fine Tuning Tasks": "Training Tasks",
15+
"Train a Masked Language Model": "Train a Masked Language Model",
16+
"I want to run a quick test with sample data####I have my own custom input data on S3": "I want to run a quick test with sample data"
17+
"AWS####IBM####My data is not stored in S3": "My data is not stored in S3",
18+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
19+
"My Cluster is Running Locally####My Cluster is Running on Kubernetes": "My Cluster is Running on Kubernetes",
20+
"expand((kubectl config get-contexts -o name | grep -E . >& /dev/null && kubectl config get-contexts -o name) || (kubectl version | grep Server >& /dev/null && echo \"${KUBE_CONTEXT_FOR_TEST-In-cluster}\" || exit 1), Kubernetes contexts)": "kind-codeflare-test",
21+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || X=$([ -n \"$KUBE_NS_FOR_TEST\" ] && echo $KUBE_NS_FOR_TEST || kubectl ${KUBE_CONTEXT_ARG} get ns -o name || oc ${KUBE_CONTEXT_ARG} get projects -o name); echo \"$X\" | sed -E 's#(namespace|project.project.openshift.io)/##' | grep -Ev 'openshift|kube-', Kubernetes namespaces)####Create a namespace": "default",
22+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"1\",\"Maximum Workers\":\"1\",\"Worker Memory\":\"8Gi\",\"Head Memory\":\"8Gi\"}",
23+
"Keep It Simple####Use the Ray Autoscaler####Use the Multi-user Enhanced Kubernetes Scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler",
24+
"My administrator has already installed and configured MCAD####MCAD with the Advanced Coscheduler####MCAD with the Default Kubernetes Scheduler": "MCAD with the Default Kubernetes Scheduler"
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"name": "mcad-preinstalled",
3+
"creationTime": 1660657756574,
4+
"lastModifiedTime": 1660747919298,
5+
"lastUsedTime": 1660832127576,
6+
"choices": {
7+
"madwizard/apriori/use-gpu": "don't use gpus",
8+
"madwizard/apriori/arch": "x64",
9+
"madwizard/apriori/platform": "darwin",
10+
"madwizard/apriori/mac-installer": "Homebrew",
11+
"madwizard/apriori/in-terminal": "HTML",
12+
"Start a new Run####Connect Dashboard to an existing Run####Boot up a Cloud Computer####Shut down a Cloud Computer": "Start a new Run",
13+
"Run with CodeFlare Model Architecture####Bring Your Own Code####Demos": "Run with CodeFlare Model Architecture",
14+
"Training Tasks####Fine Tuning Tasks": "Training Tasks",
15+
"Train a Masked Language Model": "Train a Masked Language Model",
16+
"I want to run a quick test with sample data####I have my own custom input data on S3": "I want to run a quick test with sample data",
17+
"AWS####IBM####My data is not stored in S3": "My data is not stored in S3",
18+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
19+
"My Cluster is Running Locally####My Cluster is Running on Kubernetes": "My Cluster is Running on Kubernetes",
20+
"expand((kubectl config get-contexts -o name | grep -E . >& /dev/null && kubectl config get-contexts -o name) || (kubectl version | grep Server >& /dev/null && echo \"${KUBE_CONTEXT_FOR_TEST-In-cluster}\" || exit 1), Kubernetes contexts)": "kind-codeflare-test",
21+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || X=$([ -n \"$KUBE_NS_FOR_TEST\" ] && echo $KUBE_NS_FOR_TEST || kubectl ${KUBE_CONTEXT_ARG} get ns -o name || oc ${KUBE_CONTEXT_ARG} get projects -o name); echo \"$X\" | sed -E 's#(namespace|project.project.openshift.io)/##' | grep -Ev 'openshift|kube-', Kubernetes namespaces)####Create a namespace": "default",
22+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"1\",\"Maximum Workers\":\"1\",\"Worker Memory\":\"8Gi\",\"Head Memory\":\"8Gi\"}",
23+
"Keep It Simple####Use the Ray Autoscaler####Use the Multi-user Enhanced Kubernetes Scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler",
24+
"My administrator has already installed and configured MCAD####MCAD with the Advanced Coscheduler####MCAD with the Default Kubernetes Scheduler": "My administrator has already installed and configured MCAD"
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"name": "ray-autoscaler",
3+
"creationTime": 1660657756574,
4+
"lastModifiedTime": 1660675440396,
5+
"lastUsedTime": 1660743373674,
6+
"choices": {
7+
"madwizard/apriori/use-gpu": "don't use gpus",
8+
"madwizard/apriori/arch": "x64",
9+
"madwizard/apriori/platform": "darwin",
10+
"madwizard/apriori/mac-installer": "Homebrew",
11+
"madwizard/apriori/in-terminal": "HTML",
12+
"Start a new Run####Connect Dashboard to an existing Run####Boot up a Cloud Computer####Shut down a Cloud Computer": "Start a new Run",
13+
"Run with CodeFlare Model Architecture####Bring Your Own Code####Demos": "Run with CodeFlare Model Architecture",
14+
"Training Tasks####Fine Tuning Tasks": "Training Tasks",
15+
"Train a Masked Language Model": "Train a Masked Language Model",
16+
"I want to run a quick test with sample data####I have my own custom input data on S3": "I want to run a quick test with sample data",
17+
"AWS####IBM####My data is not stored in S3": "My data is not stored in S3",
18+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
19+
"My Cluster is Running Locally####My Cluster is Running on Kubernetes": "My Cluster is Running on Kubernetes",
20+
"expand((kubectl config get-contexts -o name | grep -E . >& /dev/null && kubectl config get-contexts -o name) || (kubectl version | grep Server >& /dev/null && echo \"${KUBE_CONTEXT_FOR_TEST-In-cluster}\" || exit 1), Kubernetes contexts)": "kind-codeflare-test",
21+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || X=$([ -n \"$KUBE_NS_FOR_TEST\" ] && echo $KUBE_NS_FOR_TEST || kubectl ${KUBE_CONTEXT_ARG} get ns -o name || oc ${KUBE_CONTEXT_ARG} get projects -o name); echo \"$X\" | sed -E 's#(namespace|project.project.openshift.io)/##' | grep -Ev 'openshift|kube-', Kubernetes namespaces)####Create a namespace": "default",
22+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"1\",\"Maximum Workers\":\"1\",\"Worker Memory\":\"8Gi\",\"Head Memory\":\"8Gi\"}",
23+
"Keep It Simple####Use the Ray Autoscaler####Use the Multi-user Enhanced Kubernetes Scheduler": "Use the Ray Autoscaler"
24+
}
25+
}

0 commit comments

Comments
 (0)