// Alloy k8s configuration - collects pod logs from all namespaces // ============== K8S POD LOG DISCOVERY ============== // Discover all pods in the cluster discovery.kubernetes "pods" { role = "pod" } // Relabel to extract useful metadata discovery.relabel "pods" { targets = discovery.kubernetes.pods.targets // Keep only running pods rule { source_labels = ["__meta_kubernetes_pod_phase"] regex = "Pending|Succeeded|Failed|Unknown" action = "drop" } // Set namespace label rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } // Set pod name label rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } // Set container name label rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } // Set app label from pod labels rule { source_labels = ["__meta_kubernetes_pod_label_app"] target_label = "app" } // Fallback: use app.kubernetes.io/name if no app label rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] target_label = "app" regex = "(.+)" action = "replace" } // Set node name rule { source_labels = ["__meta_kubernetes_pod_node_name"] target_label = "node" } // Build the log path for the pod container rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] target_label = "__path__" separator = "/" replacement = "/var/log/pods/*$1/$2/*.log" } } // Tail pod logs loki.source.kubernetes "pods" { targets = discovery.relabel.pods.output forward_to = [loki.process.pods.receiver] } // Process logs - parse JSON if present, add labels loki.process "pods" { forward_to = [loki.write.loki.receiver] // Drop noisy deprecation warning from minikube storage-provisioner // See: https://github.com/kubernetes/minikube/issues/21009 stage.drop { source = "" expression = "v1 Endpoints is deprecated" } // Try to parse JSON logs (e.g., structured app logs) // Handle both "msg" (common) and "message" (zot) field names stage.json { expressions = { level = "level", msg = "msg", message = "message", time = "time", caller = "caller", repository = "repository", } } // Drop JSON parsing error labels (non-JSON logs are fine, just won't have extracted fields) stage.label_drop { values = ["__error__", "__error_details__"] } // Normalize 1password-connect numeric log levels to strings (1=error..5=trace) // Scoped to the 1password namespace so other services are unaffected. // See: https://github.com/1Password/connect/issues/44 stage.match { selector = "{namespace=\"1password\"}" stage.template { source = "level" template = "{{ if eq .Value \"1\" }}error{{ else if eq .Value \"2\" }}warn{{ else if eq .Value \"3\" }}info{{ else if eq .Value \"4\" }}debug{{ else if eq .Value \"5\" }}trace{{ else }}{{ .Value }}{{ end }}" } } // Extract labels from parsed JSON data stage.labels { values = { level = "", caller = "", repository = "", } } // Add cluster label for multi-cluster identification stage.static_labels { values = { cluster = "indri" } } } // Write logs to Loki loki.write "loki" { endpoint { url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push" } } // ============== SERVICE HEALTH PROBES ============== // Blackbox-style HTTP probes for k8s services prometheus.exporter.blackbox "services" { config = "{ modules: { http_2xx: { prober: http, timeout: 5s } } }" target { name = "miniflux" address = "http://miniflux.miniflux.svc.cluster.local:8080/healthcheck" module = "http_2xx" } target { name = "kiwix" address = "http://kiwix.kiwix.svc.cluster.local:80/" module = "http_2xx" } target { name = "transmission" address = "http://transmission.torrent.svc.cluster.local:9091/transmission/web/" module = "http_2xx" } target { // devpi runs natively on indri (LaunchAgent), not in-cluster. // We probe through Caddy (https://pypi.ops.eblu.me) which the cluster can reach via Tailscale. name = "devpi" address = "https://pypi.ops.eblu.me/+api" module = "http_2xx" } target { name = "argocd" address = "http://argocd-server.argocd.svc.cluster.local:80/healthz" module = "http_2xx" } target { name = "prometheus" address = "http://prometheus.monitoring.svc.cluster.local:9090/-/healthy" module = "http_2xx" } target { name = "loki" address = "http://loki.monitoring.svc.cluster.local:3100/ready" module = "http_2xx" } target { name = "grafana" address = "http://grafana.monitoring.svc.cluster.local:80/api/health" module = "http_2xx" } target { name = "teslamate" address = "http://teslamate.teslamate.svc.cluster.local:4000/" module = "http_2xx" } target { name = "navidrome" address = "http://navidrome.navidrome.svc.cluster.local:4533/" module = "http_2xx" } } // Scrape blackbox probe results prometheus.scrape "blackbox" { targets = prometheus.exporter.blackbox.services.targets scrape_interval = "30s" forward_to = [prometheus.remote_write.prometheus.receiver] } // Push metrics to Prometheus prometheus.remote_write "prometheus" { external_labels = { cluster = "indri" } endpoint { url = "http://prometheus.monitoring.svc.cluster.local:9090/api/v1/write" } }