Skip to content

Commit f34697a

Browse files
authored
cluster status safeguards (#254)
1 parent a285ef6 commit f34697a

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

src/codeflare_sdk/cluster/cluster.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,15 @@ def status(
176176
ready = False
177177
status = CodeFlareClusterStatus.FAILED # should deleted be separate
178178
return status, ready # exit early, no need to check ray status
179-
elif appwrapper.status in [AppWrapperStatus.PENDING]:
179+
elif appwrapper.status in [
180+
AppWrapperStatus.PENDING,
181+
AppWrapperStatus.QUEUEING,
182+
]:
180183
ready = False
181-
status = CodeFlareClusterStatus.QUEUED
184+
if appwrapper.status == AppWrapperStatus.PENDING:
185+
status = CodeFlareClusterStatus.QUEUED
186+
else:
187+
status = CodeFlareClusterStatus.QUEUEING
182188
if print_to_console:
183189
pretty_print.print_app_wrappers_status([appwrapper])
184190
return (
@@ -561,11 +567,18 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
561567

562568

563569
def _map_to_app_wrapper(aw) -> AppWrapper:
570+
if "status" in aw and "canrun" in aw["status"]:
571+
return AppWrapper(
572+
name=aw["metadata"]["name"],
573+
status=AppWrapperStatus(aw["status"]["state"].lower()),
574+
can_run=aw["status"]["canrun"],
575+
job_state=aw["status"]["queuejobstate"],
576+
)
564577
return AppWrapper(
565578
name=aw["metadata"]["name"],
566-
status=AppWrapperStatus(aw["status"]["state"].lower()),
567-
can_run=aw["status"]["canrun"],
568-
job_state=aw["status"]["queuejobstate"],
579+
status=AppWrapperStatus("queueing"),
580+
can_run=False,
581+
job_state="Still adding to queue",
569582
)
570583

571584

src/codeflare_sdk/cluster/model.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class AppWrapperStatus(Enum):
3939
Defines the possible reportable states of an AppWrapper.
4040
"""
4141

42+
QUEUEING = "queueing"
4243
PENDING = "pending"
4344
RUNNING = "running"
4445
FAILED = "failed"
@@ -55,8 +56,9 @@ class CodeFlareClusterStatus(Enum):
5556
READY = 1
5657
STARTING = 2
5758
QUEUED = 3
58-
FAILED = 4
59-
UNKNOWN = 5
59+
QUEUEING = 4
60+
FAILED = 5
61+
UNKNOWN = 6
6062

6163

6264
@dataclass

0 commit comments

Comments
 (0)