Skip to content

Commit 3bbcaf0

Browse files
Remove noisy SLO alerts rules/raw (#600)
* remove noisy alerts rules/raw * remove only GET endpoint
1 parent 454c825 commit 3bbcaf0

6 files changed

+0
-1047
lines changed

configuration/observatorium/slo.go

-12
Original file line numberDiff line numberDiff line change
@@ -347,18 +347,6 @@ func ObservatoriumSLOs(envName rhobsInstanceEnv, signal signal) []pyrrav1alpha1.
347347
alertName: "APIRulesRawWriteAvailabilityErrorBudgetBurning",
348348
sloType: sloTypeAvailability,
349349
},
350-
{
351-
name: "api-rules-raw-read-availability-slo",
352-
labels: map[string]string{
353-
slo.PropagationLabelsPrefix + "service": "observatorium-api",
354-
"instance": string(envName),
355-
},
356-
description: "API /rules/raw endpoint for reads is burning too much error budget to guarantee availability SLOs.",
357-
successOrErrorsExpr: "http_requests_total{job=\"" + apiJobSelector[envName] + "\", handler=\"rules-raw\", method=\"GET\", group=\"metricsv1\", code=~\"^5..$\"}",
358-
totalExpr: "http_requests_total{job=\"" + apiJobSelector[envName] + "\", handler=\"rules-raw\", method=\"GET\", group=\"metricsv1\"}",
359-
alertName: "APIRulesRawReadAvailabilityErrorBudgetBurning",
360-
sloType: sloTypeAvailability,
361-
},
362350
{
363351
name: "api-rules-read-availability-slo",
364352
labels: map[string]string{

resources/observability/prometheusrules/rhobs-slos-mst-production.prometheusrules.yaml

-207
Original file line numberDiff line numberDiff line change
@@ -980,213 +980,6 @@ spec:
980980
labels:
981981
slo: api-rules-raw-write-availability-slo
982982
record: pyrra_errors_total
983-
- interval: 2m30s
984-
name: api-rules-raw-read-availability-slo-increase
985-
rules:
986-
- expr: sum by(code) (increase(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4w]))
987-
labels:
988-
group: metricsv1
989-
handler: rules-raw
990-
job: observatorium-observatorium-mst-api
991-
method: GET
992-
service: observatorium-api
993-
slo: api-rules-raw-read-availability-slo
994-
record: http_requests:increase4w
995-
- alert: SLOMetricAbsent
996-
annotations:
997-
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
998-
message: API /rules/raw endpoint for reads is burning too much error budget
999-
to guarantee availability SLOs.
1000-
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
1001-
expr: absent(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
1002-
== 1
1003-
for: 2m
1004-
labels:
1005-
group: metricsv1
1006-
handler: rules-raw
1007-
job: observatorium-observatorium-mst-api
1008-
method: GET
1009-
service: observatorium-api
1010-
severity: medium
1011-
slo: api-rules-raw-read-availability-slo
1012-
- interval: 30s
1013-
name: api-rules-raw-read-availability-slo
1014-
rules:
1015-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[5m]))
1016-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[5m]))
1017-
labels:
1018-
group: metricsv1
1019-
handler: rules-raw
1020-
job: observatorium-observatorium-mst-api
1021-
method: GET
1022-
service: observatorium-api
1023-
slo: api-rules-raw-read-availability-slo
1024-
record: http_requests:burnrate5m
1025-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[30m]))
1026-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[30m]))
1027-
labels:
1028-
group: metricsv1
1029-
handler: rules-raw
1030-
job: observatorium-observatorium-mst-api
1031-
method: GET
1032-
service: observatorium-api
1033-
slo: api-rules-raw-read-availability-slo
1034-
record: http_requests:burnrate30m
1035-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1h]))
1036-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1h]))
1037-
labels:
1038-
group: metricsv1
1039-
handler: rules-raw
1040-
job: observatorium-observatorium-mst-api
1041-
method: GET
1042-
service: observatorium-api
1043-
slo: api-rules-raw-read-availability-slo
1044-
record: http_requests:burnrate1h
1045-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[2h]))
1046-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[2h]))
1047-
labels:
1048-
group: metricsv1
1049-
handler: rules-raw
1050-
job: observatorium-observatorium-mst-api
1051-
method: GET
1052-
service: observatorium-api
1053-
slo: api-rules-raw-read-availability-slo
1054-
record: http_requests:burnrate2h
1055-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[6h]))
1056-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[6h]))
1057-
labels:
1058-
group: metricsv1
1059-
handler: rules-raw
1060-
job: observatorium-observatorium-mst-api
1061-
method: GET
1062-
service: observatorium-api
1063-
slo: api-rules-raw-read-availability-slo
1064-
record: http_requests:burnrate6h
1065-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1d]))
1066-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1d]))
1067-
labels:
1068-
group: metricsv1
1069-
handler: rules-raw
1070-
job: observatorium-observatorium-mst-api
1071-
method: GET
1072-
service: observatorium-api
1073-
slo: api-rules-raw-read-availability-slo
1074-
record: http_requests:burnrate1d
1075-
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4d]))
1076-
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4d]))
1077-
labels:
1078-
group: metricsv1
1079-
handler: rules-raw
1080-
job: observatorium-observatorium-mst-api
1081-
method: GET
1082-
service: observatorium-api
1083-
slo: api-rules-raw-read-availability-slo
1084-
record: http_requests:burnrate4d
1085-
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
1086-
annotations:
1087-
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
1088-
message: API /rules/raw endpoint for reads is burning too much error budget
1089-
to guarantee availability SLOs.
1090-
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
1091-
expr: http_requests:burnrate5m{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1092-
> (14 * (1-0.99)) and http_requests:burnrate1h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1093-
> (14 * (1-0.99))
1094-
for: 2m
1095-
labels:
1096-
group: metricsv1
1097-
handler: rules-raw
1098-
job: observatorium-observatorium-mst-api
1099-
long_burnrate_window: 1h
1100-
method: GET
1101-
service: observatorium-api
1102-
severity: high
1103-
short_burnrate_window: 5m
1104-
slo: api-rules-raw-read-availability-slo
1105-
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
1106-
annotations:
1107-
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
1108-
message: API /rules/raw endpoint for reads is burning too much error budget
1109-
to guarantee availability SLOs.
1110-
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
1111-
expr: http_requests:burnrate30m{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1112-
> (7 * (1-0.99)) and http_requests:burnrate6h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1113-
> (7 * (1-0.99))
1114-
for: 15m
1115-
labels:
1116-
group: metricsv1
1117-
handler: rules-raw
1118-
job: observatorium-observatorium-mst-api
1119-
long_burnrate_window: 6h
1120-
method: GET
1121-
service: observatorium-api
1122-
severity: high
1123-
short_burnrate_window: 30m
1124-
slo: api-rules-raw-read-availability-slo
1125-
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
1126-
annotations:
1127-
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
1128-
message: API /rules/raw endpoint for reads is burning too much error budget
1129-
to guarantee availability SLOs.
1130-
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
1131-
expr: http_requests:burnrate2h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1132-
> (2 * (1-0.99)) and http_requests:burnrate1d{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1133-
> (2 * (1-0.99))
1134-
for: 1h
1135-
labels:
1136-
group: metricsv1
1137-
handler: rules-raw
1138-
job: observatorium-observatorium-mst-api
1139-
long_burnrate_window: 1d
1140-
method: GET
1141-
service: observatorium-api
1142-
severity: medium
1143-
short_burnrate_window: 2h
1144-
slo: api-rules-raw-read-availability-slo
1145-
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
1146-
annotations:
1147-
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
1148-
message: API /rules/raw endpoint for reads is burning too much error budget
1149-
to guarantee availability SLOs.
1150-
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
1151-
expr: http_requests:burnrate6h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1152-
> (1 * (1-0.99)) and http_requests:burnrate4d{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
1153-
> (1 * (1-0.99))
1154-
for: 3h
1155-
labels:
1156-
group: metricsv1
1157-
handler: rules-raw
1158-
job: observatorium-observatorium-mst-api
1159-
long_burnrate_window: 4d
1160-
method: GET
1161-
service: observatorium-api
1162-
severity: medium
1163-
short_burnrate_window: 6h
1164-
slo: api-rules-raw-read-availability-slo
1165-
- interval: 30s
1166-
name: api-rules-raw-read-availability-slo-generic
1167-
rules:
1168-
- expr: "0.99"
1169-
labels:
1170-
slo: api-rules-raw-read-availability-slo
1171-
record: pyrra_objective
1172-
- expr: 2419200
1173-
labels:
1174-
slo: api-rules-raw-read-availability-slo
1175-
record: pyrra_window
1176-
- expr: 1 - sum(http_requests:increase4w{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}
1177-
or vector(0)) / sum(http_requests:increase4w{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
1178-
labels:
1179-
slo: api-rules-raw-read-availability-slo
1180-
record: pyrra_availability
1181-
- expr: sum(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
1182-
labels:
1183-
slo: api-rules-raw-read-availability-slo
1184-
record: pyrra_requests_total
1185-
- expr: sum(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}
1186-
or vector(0))
1187-
labels:
1188-
slo: api-rules-raw-read-availability-slo
1189-
record: pyrra_errors_total
1190983
- interval: 2m30s
1191984
name: api-rules-read-availability-slo-increase
1192985
rules:

0 commit comments

Comments
 (0)