blumeops/argocd/manifests/alloy-k8s/config.alloy

// Alloy k8s configuration - collects pod logs from all namespaces

// ============== K8S POD LOG DISCOVERY ==============

// Discover all pods in the cluster
discovery.kubernetes "pods" {
  role = "pod"
}

// Relabel to extract useful metadata
discovery.relabel "pods" {
  targets = discovery.kubernetes.pods.targets

  // Keep only running pods
  rule {
    source_labels = ["__meta_kubernetes_pod_phase"]
    regex         = "Pending|Succeeded|Failed|Unknown"
    action        = "drop"
  }

  // Set namespace label
  rule {
    source_labels = ["__meta_kubernetes_namespace"]
    target_label  = "namespace"
  }

  // Set pod name label
  rule {
    source_labels = ["__meta_kubernetes_pod_name"]
    target_label  = "pod"
  }

  // Set container name label
  rule {
    source_labels = ["__meta_kubernetes_pod_container_name"]
    target_label  = "container"
  }

  // Set app label from pod labels
  rule {
    source_labels = ["__meta_kubernetes_pod_label_app"]
    target_label  = "app"
  }

  // Fallback: use app.kubernetes.io/name if no app label
  rule {
    source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
    target_label  = "app"
    regex         = "(.+)"
    action        = "replace"
  }

  // Set node name
  rule {
    source_labels = ["__meta_kubernetes_pod_node_name"]
    target_label  = "node"
  }

  // Build the log path for the pod container
  rule {
    source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
    target_label  = "__path__"
    separator     = "/"
    replacement   = "/var/log/pods/*$1/$2/*.log"
  }
}

// Tail pod logs
loki.source.kubernetes "pods" {
  targets    = discovery.relabel.pods.output
  forward_to = [loki.process.pods.receiver]
}

// Process logs - parse JSON if present, add labels
loki.process "pods" {
  forward_to = [loki.write.loki.receiver]

  // Drop noisy deprecation warning from minikube storage-provisioner
  // See: https://github.com/kubernetes/minikube/issues/21009
  stage.drop {
    source     = ""
    expression = "v1 Endpoints is deprecated"
  }

  // Try to parse JSON logs (e.g., structured app logs)
  // Handle both "msg" (common) and "message" (zot) field names
  stage.json {
    expressions = {
      level      = "level",
      msg        = "msg",
      message    = "message",
      time       = "time",
      caller     = "caller",
      repository = "repository",
    }
  }

  // Drop JSON parsing error labels (non-JSON logs are fine, just won't have extracted fields)
  stage.label_drop {
    values = ["__error__", "__error_details__"]
  }

  // Normalize 1password-connect numeric log levels to strings (1=error..5=trace)
  // Scoped to the 1password namespace so other services are unaffected.
  // See: https://github.com/1Password/connect/issues/44
  stage.match {
    selector = "{namespace=\"1password\"}"

    stage.template {
      source   = "level"
      template = "{{ if eq .Value \"1\" }}error{{ else if eq .Value \"2\" }}warn{{ else if eq .Value \"3\" }}info{{ else if eq .Value \"4\" }}debug{{ else if eq .Value \"5\" }}trace{{ else }}{{ .Value }}{{ end }}"
    }
  }

  // Extract labels from parsed JSON data
  stage.labels {
    values = {
      level      = "",
      caller     = "",
      repository = "",
    }
  }

  // Add cluster label for multi-cluster identification
  stage.static_labels {
    values = { cluster = "indri" }
  }
}

// Write logs to Loki
loki.write "loki" {
  endpoint {
    url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
  }
}

// ============== SERVICE HEALTH PROBES ==============

// Blackbox-style HTTP probes for k8s services
prometheus.exporter.blackbox "services" {
  config = "{ modules: { http_2xx: { prober: http, timeout: 5s } } }"

  target {
    name    = "miniflux"
    address = "http://miniflux.miniflux.svc.cluster.local:8080/healthcheck"
    module  = "http_2xx"
  }

  target {
    name    = "kiwix"
    address = "http://kiwix.kiwix.svc.cluster.local:80/"
    module  = "http_2xx"
  }

  target {
    name    = "transmission"
    address = "http://transmission.torrent.svc.cluster.local:9091/transmission/web/"
    module  = "http_2xx"
  }

  target {
    // devpi runs natively on indri (LaunchAgent), not in-cluster.
    // We probe through Caddy (https://pypi.ops.eblu.me) which the cluster can reach via Tailscale.
    name    = "devpi"
    address = "https://pypi.ops.eblu.me/+api"
    module  = "http_2xx"
  }

  target {
    name    = "argocd"
    address = "http://argocd-server.argocd.svc.cluster.local:80/healthz"
    module  = "http_2xx"
  }

  target {
    name    = "prometheus"
    address = "http://prometheus.monitoring.svc.cluster.local:9090/-/healthy"
    module  = "http_2xx"
  }

  target {
    name    = "loki"
    address = "http://loki.monitoring.svc.cluster.local:3100/ready"
    module  = "http_2xx"
  }

  target {
    name    = "grafana"
    address = "http://grafana.monitoring.svc.cluster.local:80/api/health"
    module  = "http_2xx"
  }

  target {
    // Migrated to ringtail (wave-1); probe through Caddy over Tailscale.
    name    = "teslamate"
    address = "https://tesla.ops.eblu.me/"
    module  = "http_2xx"
  }

  target {
    name    = "navidrome"
    address = "http://navidrome.navidrome.svc.cluster.local:4533/"
    module  = "http_2xx"
  }

}

// Scrape blackbox probe results
prometheus.scrape "blackbox" {
  targets         = prometheus.exporter.blackbox.services.targets
  scrape_interval = "30s"
  forward_to      = [prometheus.remote_write.prometheus.receiver]
}

// Push metrics to Prometheus
prometheus.remote_write "prometheus" {
  external_labels = { cluster = "indri" }

  endpoint {
    url = "http://prometheus.monitoring.svc.cluster.local:9090/api/v1/write"
  }
}