44
44
from supervisor import events
45
45
from supervisor .states import SupervisorStates
46
46
from supervisor .states import getProcessStateDescription
47
+ from supervisor .graphutils import Graph
48
+
49
+ from supervisor .states import ProcessStates
47
50
48
51
class Supervisor :
49
52
stopping = False # set after we detect that we are handling a stop request
@@ -55,6 +58,8 @@ def __init__(self, options):
55
58
self .options = options
56
59
self .process_groups = {}
57
60
self .ticks = {}
61
+ self .process_spawn_dict = dict ()
62
+ self .process_started_dict = dict ()
58
63
59
64
def main (self ):
60
65
if not self .options .first :
@@ -84,6 +89,29 @@ def run(self):
84
89
try :
85
90
for config in self .options .process_group_configs :
86
91
self .add_process_group (config )
92
+ # add processes to directed graph, to check for dependency cycles
93
+ g = Graph (len (self .options .process_group_configs ))
94
+ # replace depends_on string with actual process object
95
+ for config in (self .options .process_group_configs ):
96
+ # check dependencies for all programs in group:
97
+ for conf in enumerate (config .process_configs ):
98
+ if config .process_configs [conf [0 ]].depends_on is not None :
99
+ process_dict = dict ({})
100
+ # split to get all processes in case there are multiple dependencies
101
+ dependent_processes = (config .process_configs [conf [0 ]].depends_on ).split ()
102
+ for process in dependent_processes :
103
+ # this can be of form group:process or simply process
104
+ try :
105
+ dependent_group , dependent_process = process .split (":" )
106
+ except :
107
+ dependent_group = dependent_process = process
108
+ g .addEdge (config .process_configs [conf [0 ]].name , dependent_process )
109
+ process_dict [dependent_process ] = self .process_groups [dependent_group ].processes [dependent_process ]
110
+ config .process_configs [conf [0 ]].depends_on = process_dict
111
+ # check for cyclical process dependencies
112
+ if g .cyclic () == 1 :
113
+ raise AttributeError ('Process config contains dependeny cycle(s)! Check config files again!' )
114
+
87
115
self .options .openhttpservers (self )
88
116
self .options .setsignals ()
89
117
if (not self .options .nodaemon ) and self .options .first :
@@ -239,7 +267,10 @@ def runforever(self):
239
267
combined_map [fd ].handle_error ()
240
268
241
269
for group in pgroups :
242
- group .transition ()
270
+ group .transition (self )
271
+
272
+ self ._spawn_dependee_queue ()
273
+ self ._handle_spawn_timeout ()
243
274
244
275
self .reap ()
245
276
self .handle_signal ()
@@ -316,6 +347,88 @@ def handle_signal(self):
316
347
def get_state (self ):
317
348
return self .options .mood
318
349
350
+ def _spawn_dependee_queue (self ):
351
+ """
352
+ Iterate over processes that are not started but added to
353
+ process_spawn_dict. Spawn all processes which are ready
354
+ (All dependees RUNNING or process without dependees)
355
+ """
356
+ if self .process_spawn_dict :
357
+ for process_name , process_object in list (self .process_spawn_dict .items ()):
358
+ if process_object .config .depends_on is not None :
359
+ if any ([dependee .state is ProcessStates .FATAL for dependee in
360
+ process_object .config .depends_on .values ()]):
361
+ self ._set_fatal_state_and_empty_queue ()
362
+ break
363
+ if all ([dependee .state is ProcessStates .RUNNING for dependee in
364
+ process_object .config .depends_on .values ()]):
365
+ self ._spawn_process_from_process_dict (process_name , process_object )
366
+ else :
367
+ self ._spawn_process_from_process_dict (process_name , process_object )
368
+
369
+ def _spawn_process_from_process_dict (self , process_name , process_object ):
370
+ self .process_started_dict [process_name ] = process_object
371
+ del self .process_spawn_dict [process_name ]
372
+ # only spawn if the process is not running yet (could be started in the meanwhile)
373
+ if (process_object .state is not ProcessStates .STARTING and
374
+ process_object .state is not ProcessStates .RUNNING ):
375
+ process_object .spawn (self )
376
+ process_object .notify_timer = 5
377
+
378
+ def _set_fatal_state_and_empty_queue (self ):
379
+ for process_name , process_object in self .process_spawn_dict .items ():
380
+ process_object .record_spawnerr (
381
+ 'Dependee process did not start - set FATAL state for {}'
382
+ .format (process_name ))
383
+ process_object .change_state (ProcessStates .FATAL )
384
+ self .process_spawn_set = set ()
385
+ self .process_spawn_dict = dict ()
386
+
387
+ def _handle_spawn_timeout (self ):
388
+ """
389
+ Log info message each 5 seconds if some process is waiting on a dependee
390
+ Timeout if a process needs longer than spawn_timeout (default=60 seconds)
391
+ to reach RUNNING
392
+ """
393
+ # check if any of the processes that was started did not make it and remove RUNNING ones.
394
+ if self .process_started_dict :
395
+ for process_name , process_object in list (self .process_started_dict .items ()):
396
+ if process_object .state is ProcessStates .RUNNING :
397
+ del self .process_started_dict [process_name ]
398
+ # handle timeout error.
399
+ elif (time .time () - process_object .laststart ) >= process_object .config .spawn_timeout :
400
+ self ._timeout_process (process_name , process_object )
401
+ # notify user about waiting
402
+ elif (time .time () - process_object .laststart ) >= process_object .notify_timer :
403
+ self ._notfiy_user_about_waiting (process_name , process_object )
404
+
405
+ def _timeout_process (self , process_name , process_object ):
406
+ msg = ("timeout: dependee process {} in {} did not reach RUNNING within {} seconds, dependees {} are not spawned"
407
+ .format (process_name ,
408
+ getProcessStateDescription (process_object .state ),
409
+ process_object .config .spawn_timeout ,
410
+ [process for process in self .process_spawn_dict .keys ()]))
411
+ process_object .config .options .logger .warn (msg )
412
+ process_object .record_spawnerr (
413
+ 'timeout: Process {} did not reach RUNNING state within {} seconds'
414
+ .format (process_name ,
415
+ process_object .config .spawn_timeout ))
416
+ process_object .change_state (ProcessStates .FATAL )
417
+ for process_name , process_object in self .process_spawn_dict .items ():
418
+ process_object .record_spawnerr (
419
+ 'Dependee process did not start - set FATAL state for {}'
420
+ .format (process_name ))
421
+ process_object .change_state (ProcessStates .FATAL )
422
+ self .process_spawn_dict = dict ()
423
+ self .process_started_dict = dict ()
424
+
425
+ def _notfiy_user_about_waiting (self , process_name , process_object ):
426
+ process_object .notify_timer += 5
427
+ msg = ("waiting for dependee process {} in {} state to be RUNNING"
428
+ .format (process_name ,
429
+ getProcessStateDescription (process_object .state )))
430
+ process_object .config .options .logger .info (msg )
431
+
319
432
def timeslice (period , when ):
320
433
return int (when - (when % period ))
321
434
0 commit comments