From b5551e227e6c25599d53516478a6192de1d97c2c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 13 Apr 2026 08:27:12 -0700 Subject: [PATCH] Route Dagger build telemetry to Tempo The Dagger engine's internal OTLP proxy returns 500 on /v1/metrics when there's no real backend, causing ~9s retry warnings per pipeline step. Point OTEL_EXPORTER_OTLP_ENDPOINT at Tempo to give it a real endpoint. Also removes the stale os.environ workaround from main.py (the SDK initializes telemetry before our module loads, so it had no effect). Co-Authored-By: Claude Opus 4.6 (1M context) --- .forgejo/workflows/build-container.yaml | 5 +++++ docs/changelog.d/+dagger-otel-metrics-fix.bugfix.md | 2 +- src/blumeops/main.py | 8 -------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.forgejo/workflows/build-container.yaml b/.forgejo/workflows/build-container.yaml index efa9007..78ee586 100644 --- a/.forgejo/workflows/build-container.yaml +++ b/.forgejo/workflows/build-container.yaml @@ -73,6 +73,11 @@ jobs: needs: detect if: needs.detect.outputs.dagger != '[]' runs-on: k8s + env: + # Send Dagger OTLP telemetry to Tempo. Without a real backend the + # engine's internal proxy returns 500 on /v1/metrics, causing noisy + # retry warnings in every build. + OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo.tracing.svc.cluster.local:4318 strategy: matrix: container: ${{ fromJson(needs.detect.outputs.dagger) }} diff --git a/docs/changelog.d/+dagger-otel-metrics-fix.bugfix.md b/docs/changelog.d/+dagger-otel-metrics-fix.bugfix.md index 6c02765..85475c2 100644 --- a/docs/changelog.d/+dagger-otel-metrics-fix.bugfix.md +++ b/docs/changelog.d/+dagger-otel-metrics-fix.bugfix.md @@ -1 +1 @@ -Fix OTEL metrics exporter warnings in Dagger builds by hard-overriding the env var before SDK init. +Route Dagger build telemetry to Tempo, fixing OTEL metrics exporter warnings. diff --git a/src/blumeops/main.py b/src/blumeops/main.py index 9f60fd4..94b932b 100644 --- a/src/blumeops/main.py +++ b/src/blumeops/main.py @@ -1,13 +1,5 @@ -import os from pathlib import Path -# Force-disable OTLP metrics exporter before the Dagger SDK initializes -# OpenTelemetry. The engine shim may set OTEL_METRICS_EXPORTER=otlp before -# our module loads, so setdefault won't work — we need a hard override. -# Without this, the engine's local OTLP endpoint returns 500s on metrics, -# causing ~9s retry cycles per pipeline step. -os.environ["OTEL_METRICS_EXPORTER"] = "none" - import dagger from dagger import dag, function, object_type