@@ -48,6 +48,7 @@ import java.net.URI
48
48
import java.time.Duration
49
49
import java.time.LocalDateTime
50
50
import java.time.format.DateTimeFormatter
51
+ import java.util.concurrent.TimeUnit
51
52
import java.util.concurrent.TimeoutException
52
53
import kotlin.coroutines.resume
53
54
import kotlin.coroutines.resumeWithException
@@ -227,16 +228,9 @@ class CoderRemoteConnectionHandle {
227
228
228
229
// Wait for the IDE to come up.
229
230
indicator.text = " Waiting for ${workspace.ideName} backend..."
230
- var status: UnattendedHostStatus ? = null
231
231
val remoteProjectPath = accessor.makeRemotePath(ShellArgument .PlainText (workspace.projectPath))
232
232
val logsDir = accessor.getLogsDir(workspace.ideProduct.productCode, remoteProjectPath)
233
- while (lifetime.status == LifetimeStatus .Alive ) {
234
- status = ensureIDEBackend(accessor, workspace, ideDir, remoteProjectPath, logsDir, lifetime, null )
235
- if (! status?.joinLink.isNullOrBlank()) {
236
- break
237
- }
238
- delay(5000 )
239
- }
233
+ var status = ensureIDEBackend(accessor, workspace, ideDir, remoteProjectPath, logsDir, lifetime, null )
240
234
241
235
// We wait for non-null, so this only happens on cancellation.
242
236
val joinLink = status?.joinLink
@@ -302,6 +296,7 @@ class CoderRemoteConnectionHandle {
302
296
}
303
297
// Continue once the client is present.
304
298
handle.onClientPresenceChanged.advise(lifetime) {
299
+ logger.info(" ${workspace.ideName} client to ${workspace.hostname} presence: ${handle.clientPresent} " )
305
300
if (handle.clientPresent && continuation.isActive) {
306
301
continuation.resume(true )
307
302
}
@@ -437,8 +432,8 @@ class CoderRemoteConnectionHandle {
437
432
}
438
433
439
434
/* *
440
- * Ensure the backend is started. Status and/or links may be null if the
441
- * backend has not started .
435
+ * Ensure the backend is started. It will not return until a join link is
436
+ * received or the lifetime expires .
442
437
*/
443
438
private suspend fun ensureIDEBackend (
444
439
accessor : HighLevelHostAccessor ,
@@ -449,41 +444,67 @@ class CoderRemoteConnectionHandle {
449
444
lifetime : LifetimeDefinition ,
450
445
currentStatus : UnattendedHostStatus ? ,
451
446
): UnattendedHostStatus ? {
452
- val details = " ${workspace.hostname} :${ideDir.toRawString()} , project=${remoteProjectPath.toRawString()} "
453
- return try {
454
- if (currentStatus?.appPid != null &&
455
- ! currentStatus.joinLink.isNullOrBlank() &&
456
- accessor.isPidAlive(currentStatus.appPid.toInt())
457
- ) {
458
- // If the PID is alive, assume the join link we have is still
459
- // valid. The join link seems to change even if it is the same
460
- // backend running, so if we always fetched the link the client
461
- // would relaunch over and over.
447
+ val details = " $${workspace.hostname} :${ideDir.toRawString()} , project=${remoteProjectPath.toRawString()} "
448
+
449
+ // Check if the current IDE is alive.
450
+ if (currentStatus != null ) {
451
+ val isAlive = try {
452
+ val isAlive = accessor.isPidAlive(currentStatus.appPid.toInt())
453
+ logger.info(" ${workspace.ideName} status: pid=${currentStatus.appPid} , alive=$isAlive " )
454
+ isAlive
455
+ } catch (ex: Exception ) {
456
+ logger.info(" Failed to check if ${workspace.ideName} is alive on $details : pid=${currentStatus.appPid} " , ex)
457
+ false
458
+ }
459
+ if (isAlive) {
460
+ // Use the current status and join link.
462
461
return currentStatus
462
+ } else {
463
+ logger.info(" Relaunching ${workspace.ideName} since it is not alive..." )
463
464
}
465
+ } else {
466
+ logger.info(" Launching ${workspace.ideName} for the first time on ${workspace.hostname} ..." )
467
+ }
464
468
465
- // See if there is already a backend running. Weirdly, there is
466
- // always a PID, even if there is no backend running, and
467
- // backendUnresponsive is always false, but the links are null so
468
- // hopefully that is an accurate indicator that the IDE is up.
469
- val status = accessor.getHostIdeStatus(ideDir, remoteProjectPath)
470
- if (! status.joinLink.isNullOrBlank()) {
471
- logger.info(" Found existing ${workspace.ideName} backend on $details " )
472
- return status
473
- }
469
+ // If the PID is not alive, spawn a new backend. This may not be
470
+ // idempotent, so only call if we are really sure we need to.
471
+ accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
474
472
475
- // Otherwise, spawn a new backend. This does not seem to spawn a
476
- // second backend if one is already running, yet it does somehow
477
- // cause a second client to launch. So only run this if we are
478
- // really sure we have to launch a new backend.
479
- logger.info(" Starting ${workspace.ideName} backend on $details " )
480
- accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
481
- // Get the newly spawned PID and join link.
482
- return accessor.getHostIdeStatus(ideDir, remoteProjectPath)
483
- } catch (ex: Exception ) {
484
- logger.info(" Failed to get ${workspace.ideName} status from $details " , ex)
485
- currentStatus
473
+ // Get the newly spawned PID and join link.
474
+ var attempts = 0
475
+ val maxAttempts = 6
476
+ val wait = TimeUnit .SECONDS .toMillis(5 )
477
+ while (lifetime.status == LifetimeStatus .Alive ) {
478
+ try {
479
+ attempts++
480
+ val status = accessor.getHostIdeStatus(ideDir, remoteProjectPath)
481
+ if (! status.joinLink.isNullOrBlank()) {
482
+ logger.info(" Found join link for ${workspace.ideName} ; proceeding to connect: pid=${status.appPid} " )
483
+ return status
484
+ }
485
+ // If we did not get a join link, see if the IDE is alive in
486
+ // case it died and we need to respawn.
487
+ val isAlive = status.appPid > 0 && accessor.isPidAlive(status.appPid.toInt())
488
+ logger.info(" ${workspace.ideName} status: pid=${status.appPid} , alive=$isAlive , unresponsive=${status.backendUnresponsive} , attempt=$attempts " )
489
+ // It is not clear whether the PID can be trusted because we get
490
+ // one even when there is no backend at all. For now give it
491
+ // some time and if it is still dead, only then try to respawn.
492
+ if (! isAlive && attempts >= maxAttempts) {
493
+ logger.info(" ${workspace.ideName} is still not alive after $attempts checks, respawning backend and waiting $wait ms to try again" )
494
+ accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
495
+ attempts = 0
496
+ } else {
497
+ logger.info(" No join link found in status; waiting $wait ms to try again" )
498
+ }
499
+ } catch (ex: Exception ) {
500
+ logger.info(" Failed to get ${workspace.ideName} status from $details ; waiting $wait ms to try again" , ex)
501
+ }
502
+ delay(wait)
486
503
}
504
+
505
+ // This means the lifetime is no longer alive.
506
+ logger.info(" Connection to ${workspace.ideName} on $details aborted by user" )
507
+ return null
487
508
}
488
509
489
510
companion object {
0 commit comments