diff --git a/CloudController.cs b/CloudController.cs index a63d362..4c399a0 100644 --- a/CloudController.cs +++ b/CloudController.cs @@ -3,6 +3,7 @@ using HetznerCloudApi; using HetznerCloudApi.Object.ServerType; using HetznerCloudApi.Object.Universal; +using Prometheus; using RandomFriendlyNameGenerator; namespace GithubActionsOrchestrator; @@ -17,6 +18,8 @@ public class CloudController( private readonly HetznerCloudClient _client = new(hetznerCloudToken); private readonly ILogger _logger = logger; private List _activeRunners = new(); + private static readonly Gauge ActiveMachinesCount = Metrics + .CreateGauge("github_machines_active", "Number of active machines", labelNames: ["org","size"]); public async Task CreateNewRunner(string arch, string size, string runnerToken, string orgName) { @@ -81,6 +84,22 @@ private void StoreActiveRunners() { byte[] json = JsonSerializer.SerializeToUtf8Bytes(_activeRunners); File.WriteAllBytes(_persistentPath, json); + + var grouped = _activeRunners + .GroupBy(m => new { m.OrgName, m.Size }) + .Select(g => new + { + g.Key.OrgName, + g.Key.Size, + Count = g.Count() + }) + .ToList(); + + foreach (var group in grouped) + { + ActiveMachinesCount.Labels(group.OrgName, group.Size).Set(group.Count); + } + } public async Task LoadActiveRunners() diff --git a/GithubActionsOrchestrator.csproj b/GithubActionsOrchestrator.csproj index 4954a22..f3a2068 100644 --- a/GithubActionsOrchestrator.csproj +++ b/GithubActionsOrchestrator.csproj @@ -11,6 +11,8 @@ + + diff --git a/Program.cs b/Program.cs index 6613d7e..eb04733 100644 --- a/Program.cs +++ b/Program.cs @@ -1,18 +1,36 @@ using System.Net.Http.Headers; using System.Text.Json; using Microsoft.AspNetCore.Mvc; +using Prometheus; namespace GithubActionsOrchestrator; public class Program { public static AutoScalerConfiguration Config = new(); + + private static readonly Counter ProcessedJobCount = Metrics + .CreateCounter("github_autoscaler_jobs_processed", "Number of processed jobs", labelNames: ["org","size"]); + + private static readonly Counter QueuedJobCount = Metrics + .CreateCounter("github_autoscaler_jobs_queued", "Number of queued jobs", labelNames: ["org","size"]); + + private static readonly Counter PickedJobCount = Metrics + .CreateCounter("github_autoscaler_jobs_picked", "Number of jobs picked up by a runner", labelNames: ["org","size"]); + + private static readonly Counter MachineCreatedCount = Metrics + .CreateCounter("github_machines_created", "Number of created machines", labelNames: ["org","size"]); + + private static readonly Counter TotalMachineTime = Metrics + .CreateCounter("github_total_machine_time", "Number of seconds machines were alive", labelNames: ["org","size"]); + public static void Main(string[] args) { - string persistPath = Environment.GetEnvironmentVariable("PERSITENT_PATH") ?? Directory.CreateTempSubdirectory().FullName; + string persistPath = Environment.GetEnvironmentVariable("PERSIST_DIR") ?? Directory.CreateTempSubdirectory().FullName; + string configDir = Environment.GetEnvironmentVariable("CONFIG_DIR") ?? Directory.CreateTempSubdirectory().FullName; // Setup pool config - string configPath = Path.Combine(persistPath, "config.json"); + string configPath = Path.Combine(configDir, "config.json"); if (!File.Exists(configPath)) { Console.WriteLine($"[ERR] Unable to read config file at {configPath}"); @@ -29,6 +47,11 @@ public static void Main(string[] args) Console.WriteLine($"[INIT] Loaded {Config.OrgConfigs.Count} orgs and {Config.Sizes.Count} sizes."); + // Prepare metrics + using var server = new Prometheus.KestrelMetricServer(port: 9000); + server.Start(); + Console.WriteLine("[INIT] Metrics server listening on port 9000"); + var builder = WebApplication.CreateBuilder(args); builder.Services.AddHostedService(); @@ -127,6 +150,8 @@ public static void Main(string[] args) string newRunner = await cloud.CreateNewRunner("x64", size, runnerToken, orgName); logger.LogInformation($"New Runner {newRunner} [{size}] entering pool."); + MachineCreatedCount.Labels(orgName, size).Inc(); + QueuedJobCount.Labels(orgName, size).Inc(); break; case "in_progress": @@ -134,6 +159,9 @@ public static void Main(string[] args) string? jobUrl = workflowJson.GetProperty("url").GetString(); logger.LogInformation($"Workflow Job {jobId} now in progress on {runnerName}"); cloud.AddJobClaimToRunner(runnerName, jobId, jobUrl, repoName); + + string jobSize = cloud.GetInfoForJob(jobId)?.Size; + PickedJobCount.Labels(orgName, jobSize).Inc(); break; case "completed": logger.LogInformation($"Workflow Job {jobId} has completed. Deleting VM associated with Job..."); @@ -145,6 +173,10 @@ public static void Main(string[] args) else { await cloud.DeleteRunner(vm.Id); + ProcessedJobCount.Labels(vm.OrgName, vm.Size).Inc(); + + double secondsAlive = (DateTime.UtcNow - vm.CreatedAt).TotalSeconds; + TotalMachineTime.Labels(vm.OrgName, vm.Size).Inc(secondsAlive); } break; default: diff --git a/Properties/launchSettings.json b/Properties/launchSettings.json index 7ed92ce..af6718b 100644 --- a/Properties/launchSettings.json +++ b/Properties/launchSettings.json @@ -16,7 +16,8 @@ "launchUrl": "swagger", "applicationUrl": "http://localhost:5178", "environmentVariables": { - "ASPNETCORE_ENVIRONMENT": "Development" + "ASPNETCORE_ENVIRONMENT": "Development", + "CONFIG_DIR": "/Users/markuskeil/dev/tmp" } }, "https": {