blumeops/argocd/manifests/alloy-k8s/config.alloy
Erich Blume 1897eb1c5b C0: move immich blackbox probe to ringtail alloy
Immich migrated to ringtail's k3s cluster but the probe still targeted
the in-cluster service DNS on indri's minikube, firing ServiceProbeFailure
indefinitely. Moved the target into alloy-ringtail's config so the probe
runs in the cluster where immich actually lives.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 08:46:22 -07:00

221 lines
5.6 KiB
Text

// Alloy k8s configuration - collects pod logs from all namespaces
// ============== K8S POD LOG DISCOVERY ==============
// Discover all pods in the cluster
discovery.kubernetes "pods" {
role = "pod"
}
// Relabel to extract useful metadata
discovery.relabel "pods" {
targets = discovery.kubernetes.pods.targets
// Keep only running pods
rule {
source_labels = ["__meta_kubernetes_pod_phase"]
regex = "Pending|Succeeded|Failed|Unknown"
action = "drop"
}
// Set namespace label
rule {
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
// Set pod name label
rule {
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}
// Set container name label
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
target_label = "container"
}
// Set app label from pod labels
rule {
source_labels = ["__meta_kubernetes_pod_label_app"]
target_label = "app"
}
// Fallback: use app.kubernetes.io/name if no app label
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
target_label = "app"
regex = "(.+)"
action = "replace"
}
// Set node name
rule {
source_labels = ["__meta_kubernetes_pod_node_name"]
target_label = "node"
}
// Build the log path for the pod container
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
target_label = "__path__"
separator = "/"
replacement = "/var/log/pods/*$1/$2/*.log"
}
}
// Tail pod logs
loki.source.kubernetes "pods" {
targets = discovery.relabel.pods.output
forward_to = [loki.process.pods.receiver]
}
// Process logs - parse JSON if present, add labels
loki.process "pods" {
forward_to = [loki.write.loki.receiver]
// Drop noisy deprecation warning from minikube storage-provisioner
// See: https://github.com/kubernetes/minikube/issues/21009
stage.drop {
source = ""
expression = "v1 Endpoints is deprecated"
}
// Try to parse JSON logs (e.g., structured app logs)
// Handle both "msg" (common) and "message" (zot) field names
stage.json {
expressions = {
level = "level",
msg = "msg",
message = "message",
time = "time",
caller = "caller",
repository = "repository",
}
}
// Drop JSON parsing error labels (non-JSON logs are fine, just won't have extracted fields)
stage.label_drop {
values = ["__error__", "__error_details__"]
}
// Normalize 1password-connect numeric log levels to strings (1=error..5=trace)
// Scoped to the 1password namespace so other services are unaffected.
// See: https://github.com/1Password/connect/issues/44
stage.match {
selector = "{namespace=\"1password\"}"
stage.template {
source = "level"
template = "{{ if eq .Value \"1\" }}error{{ else if eq .Value \"2\" }}warn{{ else if eq .Value \"3\" }}info{{ else if eq .Value \"4\" }}debug{{ else if eq .Value \"5\" }}trace{{ else }}{{ .Value }}{{ end }}"
}
}
// Extract labels from parsed JSON data
stage.labels {
values = {
level = "",
caller = "",
repository = "",
}
}
// Add cluster label for multi-cluster identification
stage.static_labels {
values = { cluster = "indri" }
}
}
// Write logs to Loki
loki.write "loki" {
endpoint {
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
}
}
// ============== SERVICE HEALTH PROBES ==============
// Blackbox-style HTTP probes for k8s services
prometheus.exporter.blackbox "services" {
config = "{ modules: { http_2xx: { prober: http, timeout: 5s } } }"
target {
name = "miniflux"
address = "http://miniflux.miniflux.svc.cluster.local:8080/healthcheck"
module = "http_2xx"
}
target {
name = "kiwix"
address = "http://kiwix.kiwix.svc.cluster.local:80/"
module = "http_2xx"
}
target {
name = "transmission"
address = "http://transmission.torrent.svc.cluster.local:9091/transmission/web/"
module = "http_2xx"
}
target {
// devpi runs natively on indri (LaunchAgent), not in-cluster.
// We probe through Caddy (https://pypi.ops.eblu.me) which the cluster can reach via Tailscale.
name = "devpi"
address = "https://pypi.ops.eblu.me/+api"
module = "http_2xx"
}
target {
name = "argocd"
address = "http://argocd-server.argocd.svc.cluster.local:80/healthz"
module = "http_2xx"
}
target {
name = "prometheus"
address = "http://prometheus.monitoring.svc.cluster.local:9090/-/healthy"
module = "http_2xx"
}
target {
name = "loki"
address = "http://loki.monitoring.svc.cluster.local:3100/ready"
module = "http_2xx"
}
target {
name = "grafana"
address = "http://grafana.monitoring.svc.cluster.local:80/api/health"
module = "http_2xx"
}
target {
name = "teslamate"
address = "http://teslamate.teslamate.svc.cluster.local:4000/"
module = "http_2xx"
}
target {
name = "navidrome"
address = "http://navidrome.navidrome.svc.cluster.local:4533/"
module = "http_2xx"
}
}
// Scrape blackbox probe results
prometheus.scrape "blackbox" {
targets = prometheus.exporter.blackbox.services.targets
scrape_interval = "30s"
forward_to = [prometheus.remote_write.prometheus.receiver]
}
// Push metrics to Prometheus
prometheus.remote_write "prometheus" {
external_labels = { cluster = "indri" }
endpoint {
url = "http://prometheus.monitoring.svc.cluster.local:9090/api/v1/write"
}
}