From 94413f73ba4c74d05d789076569d9a7db71b25a8 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sun, 22 Mar 2026 11:00:21 -0700 Subject: [PATCH] C2(deploy-infra-alerting): impl fix alert rule multi-series evaluation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add reduce step between Prometheus query and threshold to preserve per-service labels. Without it, Grafana can't distinguish the 5 probe_success series and errors with "duplicate results with labels {}". Chain: A (prometheus query) → B (reduce last) → C (threshold < 1) Co-Authored-By: Claude Opus 4.6 (1M context) --- argocd/manifests/grafana/alerting.yaml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/argocd/manifests/grafana/alerting.yaml b/argocd/manifests/grafana/alerting.yaml index 3fe4b1c..a05ddee 100644 --- a/argocd/manifests/grafana/alerting.yaml +++ b/argocd/manifests/grafana/alerting.yaml @@ -34,7 +34,7 @@ groups: rules: - uid: service-probe-failure title: ServiceProbeFailure - condition: B + condition: C for: 2m noDataState: Alerting execErrState: Alerting @@ -62,8 +62,20 @@ groups: from: 0 to: 0 model: - type: threshold + type: reduce expression: A + reducer: last + settings: + mode: dropNN + refId: B + - refId: C + datasourceUid: "__expr__" + relativeTimeRange: + from: 0 + to: 0 + model: + type: threshold + expression: B conditions: - evaluator: type: lt @@ -71,9 +83,7 @@ groups: - 1 operator: type: and - reducer: - type: last - refId: B + refId: C templates: - orgId: 1