Skip to content

Commit 30077c6

Browse files
committed
expose supervisor metrics and make more configurable
1 parent 47f3c33 commit 30077c6

File tree

3 files changed

+34
-11
lines changed

3 files changed

+34
-11
lines changed

apps/supervisor/src/env.ts

+3
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ const Env = z.object({
5454
EPHEMERAL_STORAGE_SIZE_REQUEST: z.string().default("2Gi"),
5555

5656
// Metrics
57+
METRICS_ENABLED: BoolEnv.default(true),
5758
METRICS_COLLECT_DEFAULTS: BoolEnv.default(true),
59+
METRICS_HOST: z.string().default("127.0.0.1"),
60+
METRICS_PORT: z.coerce.number().int().default(9090),
5861

5962
// Pod cleaner
6063
POD_CLEANER_ENABLED: BoolEnv.default(true),

apps/supervisor/src/index.ts

+17-10
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if (env.METRICS_COLLECT_DEFAULTS) {
3030

3131
class ManagedSupervisor {
3232
private readonly workerSession: SupervisorSession;
33-
private readonly httpServer: HttpServer;
33+
private readonly metricsServer?: HttpServer;
3434
private readonly workloadServer: WorkloadServer;
3535
private readonly workloadManager: WorkloadManager;
3636
private readonly logger = new SimpleStructuredLogger("managed-worker");
@@ -50,6 +50,7 @@ class ManagedSupervisor {
5050

5151
if (env.POD_CLEANER_ENABLED) {
5252
this.podCleaner = new PodCleaner({
53+
register,
5354
namespace: env.KUBERNETES_NAMESPACE,
5455
batchSize: env.POD_CLEANER_BATCH_SIZE,
5556
intervalMs: env.POD_CLEANER_INTERVAL_MS,
@@ -58,6 +59,7 @@ class ManagedSupervisor {
5859

5960
if (env.FAILED_POD_HANDLER_ENABLED) {
6061
this.failedPodHandler = new FailedPodHandler({
62+
register,
6163
namespace: env.KUBERNETES_NAMESPACE,
6264
reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS,
6365
});
@@ -224,12 +226,16 @@ class ManagedSupervisor {
224226
}
225227
});
226228

227-
// Used for health checks and metrics
228-
this.httpServer = new HttpServer({ port: 8080, host: "0.0.0.0" }).route("/health", "GET", {
229-
handler: async ({ reply }) => {
230-
reply.text("OK");
231-
},
232-
});
229+
if (env.METRICS_ENABLED) {
230+
this.metricsServer = new HttpServer({
231+
port: env.METRICS_PORT,
232+
host: env.METRICS_HOST,
233+
metrics: {
234+
register,
235+
expose: true,
236+
},
237+
});
238+
}
233239

234240
// Responds to workload requests only
235241
this.workloadServer = new WorkloadServer({
@@ -320,13 +326,12 @@ class ManagedSupervisor {
320326
}
321327

322328
await this.workerSession.start();
323-
324-
await this.httpServer.start();
329+
await this.metricsServer?.start();
325330
}
326331

327332
async stop() {
328333
this.logger.log("[ManagedWorker] Shutting down");
329-
await this.httpServer.stop();
334+
await this.workerSession.stop();
330335

331336
if (this.podCleaner) {
332337
await this.podCleaner.stop();
@@ -335,6 +340,8 @@ class ManagedSupervisor {
335340
if (this.failedPodHandler) {
336341
await this.failedPodHandler.stop();
337342
}
343+
344+
await this.metricsServer?.stop();
338345
}
339346
}
340347

apps/supervisor/src/workloadServer/index.ts

+14-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {
2222
} from "@trigger.dev/core/v3/workers";
2323
import { HttpServer, type CheckpointClient } from "@trigger.dev/core/v3/serverOnly";
2424
import { type IncomingMessage } from "node:http";
25+
import { register } from "../metrics.js";
2526

2627
// Use the official export when upgrading to [email protected]
2728
interface DefaultEventsMap {
@@ -121,7 +122,19 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
121122
}
122123

123124
private createHttpServer({ host, port }: { host: string; port: number }) {
124-
return new HttpServer({ port, host })
125+
return new HttpServer({
126+
port,
127+
host,
128+
metrics: {
129+
register,
130+
expose: false,
131+
},
132+
})
133+
.route("/health", "GET", {
134+
handler: async ({ reply }) => {
135+
reply.text("OK");
136+
},
137+
})
125138
.route(
126139
"/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/attempts/start",
127140
"POST",

0 commit comments

Comments
 (0)