103 lines
2.9 KiB
YAML
103 lines
2.9 KiB
YAML
apiVersion: 1
|
|
|
|
contactPoints:
|
|
- orgId: 1
|
|
name: ntfy-infra
|
|
receivers:
|
|
- uid: ntfy-infra-webhook
|
|
type: webhook
|
|
settings:
|
|
url: http://webhook-debug.monitoring.svc.cluster.local:8080/test
|
|
httpMethod: POST
|
|
maxAlerts: "0"
|
|
payloadTemplate: >-
|
|
{{ template "ntfy-infra.payload" . }}
|
|
disableResolveMessage: false
|
|
|
|
policies:
|
|
- orgId: 1
|
|
receiver: ntfy-infra
|
|
group_by:
|
|
- alertname
|
|
- service
|
|
group_wait: 10s
|
|
group_interval: 30s
|
|
repeat_interval: 1m
|
|
|
|
groups:
|
|
- orgId: 1
|
|
name: service-health
|
|
folder: Infrastructure Alerts
|
|
interval: 30s
|
|
rules:
|
|
- uid: service-probe-failure
|
|
title: ServiceProbeFailure
|
|
condition: C
|
|
for: 2m
|
|
noDataState: Alerting
|
|
execErrState: Alerting
|
|
annotations:
|
|
summary: >-
|
|
{{ index $labels "service" }} health check is failing
|
|
runbook_url: https://docs.eblu.me/how-to/alerts/runbook-service-probe-failure
|
|
labels:
|
|
severity: warning
|
|
data:
|
|
- refId: A
|
|
datasourceUid: prometheus
|
|
relativeTimeRange:
|
|
from: 300
|
|
to: 0
|
|
model:
|
|
expr: >-
|
|
label_replace(probe_success, "service",
|
|
"$1", "job", "integrations/blackbox/(.*)")
|
|
interval: ""
|
|
refId: A
|
|
- refId: B
|
|
datasourceUid: "__expr__"
|
|
relativeTimeRange:
|
|
from: 0
|
|
to: 0
|
|
model:
|
|
type: reduce
|
|
expression: A
|
|
reducer: last
|
|
settings:
|
|
mode: dropNN
|
|
refId: B
|
|
- refId: C
|
|
datasourceUid: "__expr__"
|
|
relativeTimeRange:
|
|
from: 0
|
|
to: 0
|
|
model:
|
|
type: threshold
|
|
expression: B
|
|
conditions:
|
|
- evaluator:
|
|
type: lt
|
|
params:
|
|
- 1
|
|
operator:
|
|
type: and
|
|
refId: C
|
|
|
|
templates:
|
|
- orgId: 1
|
|
name: ntfy-infra
|
|
template: |
|
|
{{ define "ntfy-infra.payload" -}}
|
|
{{- $msg := "" -}}
|
|
{{- range .Alerts -}}
|
|
{{- $msg = (printf "%s%s\n" $msg .Annotations.summary) -}}
|
|
{{- end -}}
|
|
{{- $title := (printf "[%s] %s" (.Status | toUpper) .CommonLabels.alertname) -}}
|
|
{{- $actions := coll.Slice -}}
|
|
{{- range .Alerts -}}
|
|
{{- if .Annotations.runbook_url -}}
|
|
{{- $actions = coll.Append (coll.Dict "action" "view" "label" "Open Runbook" "url" .Annotations.runbook_url) $actions -}}
|
|
{{- end -}}
|
|
{{- end -}}
|
|
{{- coll.Dict "topic" "infra-alerts" "title" $title "message" $msg "priority" 3 "actions" $actions | data.ToJSON -}}
|
|
{{- end }}
|