Skip to content

Commit f33c2d0

Browse files
🐛 subprocess Make process supervisor more robust by handling errors and undefined commands (#563)
<!-- Copyright (C) 2020-2022 Arm Limited or its affiliates and Contributors. All rights reserved. SPDX-License-Identifier: Apache-2.0 --> ### Description <!-- Please add any detail or context that would be useful to a reviewer. --> Make process supervisor more robust by handling errors and undefined commands ### Test Coverage <!-- Please put an `x` in the correct box e.g. `[x]` to indicate the testing coverage of this change. --> - [x] This change is covered by existing or additional automated tests. - [ ] Manual testing has been performed (and evidence provided) as automated testing was not feasible. - [ ] Additional tests are not required for this change (e.g. documentation update).
1 parent 3cff089 commit f33c2d0

File tree

3 files changed

+141
-25
lines changed

3 files changed

+141
-25
lines changed

changes/20250212110908.bugfix

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:bug: `subprocess` Make process supervisor more robust by handling errors and undefined commands

utils/subprocess/supervisor/supervisor.go

+24-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
// A Supervisor will run a command and automatically restart it if it exits. Hooks can be used to execute code at
1616
// different points in the execution lifecyle. Restarts can be delayed
1717
type Supervisor struct {
18-
newCommand func(ctx context.Context) *subprocess.Subprocess
18+
newCommand func(ctx context.Context) (*subprocess.Subprocess, error)
1919
preStart func(context.Context) error
2020
postStart func(context.Context) error
2121
postStop func(context.Context, error) error
@@ -27,7 +27,7 @@ type SupervisorOption func(*Supervisor)
2727

2828
// NewSupervisor will take a function 'newCommand' that creates a command and run it every time the command exits.
2929
// Options can be supplied by the 'opts' variadic argument that control the lifecyle of the supervisor
30-
func NewSupervisor(newCommand func(ctx context.Context) *subprocess.Subprocess, opts ...SupervisorOption) *Supervisor {
30+
func NewSupervisor(newCommand func(ctx context.Context) (*subprocess.Subprocess, error), opts ...SupervisorOption) *Supervisor {
3131
supervisor := &Supervisor{
3232
newCommand: newCommand,
3333
restartDelay: 0,
@@ -86,20 +86,33 @@ func (s *Supervisor) Run(ctx context.Context) (err error) {
8686
if s.preStart != nil {
8787
err = s.preStart(ctx)
8888
if err != nil {
89-
err = fmt.Errorf("%w: error running pre-start hook: %v", commonerrors.ErrUnexpected, err.Error())
90-
return
89+
if commonerrors.Any(err, commonerrors.ErrCancelled, commonerrors.ErrTimeout) {
90+
return err
91+
}
92+
return fmt.Errorf("%w: error running pre-start hook: %v", commonerrors.ErrUnexpected, err.Error())
9193
}
9294
}
9395

9496
g, _ := errgroup.WithContext(ctx)
95-
cmd := s.newCommand(ctx)
97+
cmd, err := s.newCommand(ctx)
98+
if err != nil {
99+
if commonerrors.Any(err, commonerrors.ErrCancelled, commonerrors.ErrTimeout) {
100+
return err
101+
}
102+
return fmt.Errorf("%w: error occurred when creating new command: %v", commonerrors.ErrUnexpected, err.Error())
103+
}
104+
if cmd == nil {
105+
return fmt.Errorf("%w: command was undefined", commonerrors.ErrUndefined)
106+
}
96107
g.Go(cmd.Execute)
97108

98109
if s.postStart != nil {
99110
err = s.postStart(ctx)
100111
if err != nil {
101-
err = fmt.Errorf("%w: error running post-start hook: %v", commonerrors.ErrUnexpected, err.Error())
102-
return err
112+
if commonerrors.Any(err, commonerrors.ErrCancelled, commonerrors.ErrTimeout) {
113+
return err
114+
}
115+
return fmt.Errorf("%w: error running post-start hook: %v", commonerrors.ErrUnexpected, err.Error())
103116
}
104117
}
105118

@@ -108,8 +121,10 @@ func (s *Supervisor) Run(ctx context.Context) (err error) {
108121
if s.postStop != nil {
109122
err = s.postStop(context.WithoutCancel(ctx), processErr)
110123
if err != nil {
111-
err = fmt.Errorf("%w: error running post-stop hook: %v", commonerrors.ErrUnexpected, err.Error())
112-
return err
124+
if commonerrors.Any(err, commonerrors.ErrCancelled, commonerrors.ErrTimeout) {
125+
return err
126+
}
127+
return fmt.Errorf("%w: error running post-stop hook: %v", commonerrors.ErrUnexpected, err.Error())
113128
}
114129
}
115130

utils/subprocess/supervisor/supervisor_test.go

+116-16
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package supervisor
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"path/filepath"
78
"testing"
@@ -38,8 +39,8 @@ func TestSupervisor(t *testing.T) {
3839
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "sed", "-i", `$a test123`, testFile)
3940
require.NoError(t, err)
4041

41-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
42-
return cmd
42+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
43+
return cmd, nil
4344
})
4445

4546
require.False(t, filesystem.Exists(testFile))
@@ -55,6 +56,30 @@ func TestSupervisor(t *testing.T) {
5556
assert.Contains(t, string(written), "test\ntest123\ntest123")
5657
})
5758

59+
t.Run("with command error", func(t *testing.T) {
60+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
61+
defer cancel()
62+
63+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
64+
return nil, errors.New("something happened")
65+
})
66+
67+
err := runner.Run(ctx)
68+
errortest.AssertError(t, err, commonerrors.ErrUnexpected)
69+
})
70+
71+
t.Run("with nil command", func(t *testing.T) {
72+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
73+
defer cancel()
74+
75+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
76+
return nil, nil
77+
})
78+
79+
err := runner.Run(ctx)
80+
errortest.AssertError(t, err, commonerrors.ErrUndefined)
81+
})
82+
5883
t.Run("with pre run", func(t *testing.T) {
5984
if platform.IsWindows() {
6085
t.SkipNow()
@@ -72,8 +97,8 @@ func TestSupervisor(t *testing.T) {
7297
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
7398
require.NoError(t, err)
7499

75-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
76-
return cmd
100+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
101+
return cmd, nil
77102
}, WithPreStart(func(_ context.Context) error {
78103
_ = counter.Inc()
79104
return nil
@@ -102,8 +127,8 @@ func TestSupervisor(t *testing.T) {
102127
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
103128
require.NoError(t, err)
104129

105-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
106-
return cmd
130+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
131+
return cmd, nil
107132
}, WithPostStart(func(_ context.Context) error {
108133
_ = counter.Inc()
109134
return nil
@@ -132,8 +157,8 @@ func TestSupervisor(t *testing.T) {
132157
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
133158
require.NoError(t, err)
134159

135-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
136-
return cmd
160+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
161+
return cmd, nil
137162
}, WithPostStop(func(_ context.Context, _ error) error {
138163
_ = counter.Inc()
139164
return nil
@@ -164,8 +189,8 @@ func TestSupervisor(t *testing.T) {
164189
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
165190
require.NoError(t, err)
166191

167-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
168-
return cmd
192+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
193+
return cmd, nil
169194
}, WithPreStart(func(_ context.Context) error {
170195
_ = counter1.Inc()
171196
return nil
@@ -182,6 +207,81 @@ func TestSupervisor(t *testing.T) {
182207
assert.Equal(t, counter1.Load(), counter2.Load())
183208
})
184209

210+
t.Run("with pre run (timeout)", func(t *testing.T) {
211+
if platform.IsWindows() {
212+
t.SkipNow()
213+
}
214+
215+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
216+
defer cancel()
217+
218+
logger, err := logs.NewLogrLogger(logstest.NewTestLogger(t), "Test")
219+
require.NoError(t, err)
220+
221+
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
222+
require.NoError(t, err)
223+
224+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
225+
return cmd, nil
226+
}, WithPreStart(func(_ context.Context) error {
227+
return commonerrors.ErrTimeout
228+
}))
229+
230+
err = runner.Run(ctx)
231+
errortest.AssertError(t, err, commonerrors.ErrTimeout)
232+
assert.NotContains(t, err.Error(), "error running pre-start hook")
233+
})
234+
235+
t.Run("with post run (timeout)", func(t *testing.T) {
236+
if platform.IsWindows() {
237+
t.SkipNow()
238+
}
239+
240+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
241+
defer cancel()
242+
243+
logger, err := logs.NewLogrLogger(logstest.NewTestLogger(t), "Test")
244+
require.NoError(t, err)
245+
246+
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
247+
require.NoError(t, err)
248+
249+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
250+
return cmd, nil
251+
}, WithPostStart(func(_ context.Context) error {
252+
return commonerrors.ErrTimeout
253+
}))
254+
255+
err = runner.Run(ctx)
256+
errortest.AssertError(t, err, commonerrors.ErrTimeout)
257+
assert.NotContains(t, err.Error(), "error running post-start hook")
258+
})
259+
260+
t.Run("with post stop (timeout)", func(t *testing.T) {
261+
if platform.IsWindows() {
262+
t.SkipNow()
263+
}
264+
265+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
266+
defer cancel()
267+
268+
logger, err := logs.NewLogrLogger(logstest.NewTestLogger(t), "Test")
269+
require.NoError(t, err)
270+
271+
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "echo", "123")
272+
require.NoError(t, err)
273+
274+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
275+
return cmd, nil
276+
}, WithPostStop(func(_ context.Context, _ error) error {
277+
return commonerrors.ErrTimeout
278+
}))
279+
280+
err = runner.Run(ctx)
281+
errortest.AssertError(t, err, commonerrors.ErrTimeout)
282+
assert.NotContains(t, err.Error(), "error running post-stop hook")
283+
})
284+
185285
t.Run("with cancel", func(t *testing.T) {
186286
ctx, cancel := context.WithCancel(context.Background())
187287
defer cancel()
@@ -195,8 +295,8 @@ func TestSupervisor(t *testing.T) {
195295
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "sed", "-i", `$a test123`, testFile)
196296
require.NoError(t, err)
197297

198-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
199-
return cmd
298+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
299+
return cmd, nil
200300
})
201301

202302
cancel()
@@ -223,8 +323,8 @@ func TestSupervisor(t *testing.T) {
223323
cmd, err := subprocess.New(ctx, logger, "starting", "success", failMessage, "sed", "-i", `$a test123`, testFile)
224324
require.NoError(t, err)
225325

226-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
227-
return cmd
326+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
327+
return cmd, nil
228328
}, WithHaltingErrors(fmt.Errorf("%v %v", failMessage, commonerrors.ErrCancelled)))
229329

230330
require.False(t, filesystem.Exists(testFile))
@@ -262,8 +362,8 @@ func TestSupervisor(t *testing.T) {
262362
cmd, err := subprocess.New(ctx, logger, "starting", "success", "failed", "sed", "-i", `$a test123`, testFile)
263363
require.NoError(t, err)
264364

265-
runner := NewSupervisor(func(ctx context.Context) *subprocess.Subprocess {
266-
return cmd
365+
runner := NewSupervisor(func(ctx context.Context) (*subprocess.Subprocess, error) {
366+
return cmd, nil
267367
}, WithRestartDelay(time.Hour)) // won't have time to restart
268368

269369
require.False(t, filesystem.Exists(testFile))

0 commit comments

Comments
 (0)