## Summary
Adds the third observability pillar — **distributed tracing** — alongside existing metrics (Prometheus) and logs (Loki).
- **Grafana Tempo 2.10.1** on minikube-indri for trace storage with 7d retention, OTLP receivers, and `metrics_generator` that remote-writes span-metrics (RED) to Prometheus
- **Beyla eBPF auto-instrumentation** via a privileged Alloy DaemonSet on ringtail — instruments HTTP services (Frigate, ntfy, Ollama, Immich) without code changes
- **Grafana integration** — Tempo datasource with trace↔log and trace↔metrics correlation, plus Loki derivedFields for trace ID linking
- **Prometheus** scrapes Tempo operational metrics
### Architecture
```
ringtail (k3s) indri (minikube)
┌──────────────────────┐ ┌─────────────────────┐
│ Alloy+Beyla (eBPF) │──OTLP HTTP────────→ │ Tempo │
│ ↳ Frigate, ntfy, │ via tailnet │ ↳ trace storage │
│ Ollama, Immich │ │ ↳ RED → Prometheus │
└──────────────────────┘ │ │
│ Grafana │
│ ↳ Tempo datasource │
└─────────────────────┘
```
### New files (12)
- `docs/reference/services/tempo.md` — reference doc
- `docs/changelog.d/feature-otel-tracing.feature.md`
- `argocd/apps/tempo.yaml` + `argocd/manifests/tempo/` (6 files)
- `argocd/apps/alloy-tracing-ringtail.yaml` + `argocd/manifests/alloy-tracing-ringtail/` (4 files)
### Modified files (6)
- `argocd/manifests/grafana/datasources.yaml` — Tempo datasource + Loki derivedFields
- `argocd/manifests/prometheus/prometheus.yml` — Tempo scrape target
- `service-versions.yaml` — tempo + alloy-tracing-ringtail entries
- `docs/reference/services/grafana.md` — Tempo in datasources table
- `docs/reference/reference.md` — Tempo in services index
- `docs/reference/operations/observability.md` — Tempo in components list
## Deployment and Testing
- [ ] Sync `apps` app to pick up new Application definitions
- [ ] `argocd app set tempo --revision feature/otel-tracing && argocd app sync tempo`
- [ ] Verify Tempo pod: `kubectl --context=minikube-indri get pods -n monitoring -l app=tempo`
- [ ] Verify Tempo ready: port-forward 3200 and `curl localhost:3200/ready`
- [ ] Verify Tailscale ingresses: `kubectl --context=minikube-indri get ingress -n monitoring`
- [ ] `argocd app set alloy-tracing-ringtail --revision feature/otel-tracing && argocd app sync alloy-tracing-ringtail`
- [ ] Check Beyla discovery in alloy-tracing logs on ringtail
- [ ] Sync grafana-config for updated datasources
- [ ] Sync prometheus for updated scrape config
- [ ] Test Grafana Tempo datasource connection
- [ ] Generate test traffic and search traces in Grafana Explore → Tempo
- [ ] After merge: reset all ArgoCD app revisions back to main
Reviewed-on: #286
491 lines
15 KiB
YAML
491 lines
15 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-dashboard-tempo
|
|
namespace: monitoring
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
data:
|
|
tempo.json: |
|
|
{
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": null,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 5368709120 },
|
|
{ "color": "red", "value": 8589934592 }
|
|
]
|
|
},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
|
"id": 1,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(tempodb_backend_bytes_total{job=\"tempo\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Storage Used",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 50 },
|
|
{ "color": "red", "value": 80 }
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"max": 100,
|
|
"min": 0
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
|
"id": 2,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(tempodb_backend_bytes_total{job=\"tempo\"}) / 10737418240 * 100",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "PVC Utilization (of 10Gi)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
|
"id": 3,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(tempodb_blocklist_length{job=\"tempo\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Total Blocks",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 0.5 },
|
|
{ "color": "red", "value": 0.9 }
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
|
"id": 4,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "1 - (go_memstats_heap_idle_bytes{job=\"tempo\"} / go_memstats_heap_sys_bytes{job=\"tempo\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Heap Usage",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
|
"id": 5,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": ["lastNotNull"],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(tempodb_backend_bytes_total{job=\"tempo\"})",
|
|
"legendFormat": "Backend Storage",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "go_memstats_heap_inuse_bytes{job=\"tempo\"}",
|
|
"legendFormat": "Heap In Use",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Storage Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
|
"id": 6,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": ["mean", "max"],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "rate(tempo_distributor_spans_received_total{job=\"tempo\"}[5m])",
|
|
"legendFormat": "Spans/sec",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Span Ingestion Rate",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
|
|
"id": 7,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": ["mean", "max"],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "rate(tempo_distributor_bytes_received_total{job=\"tempo\"}[5m])",
|
|
"legendFormat": "Bytes Received",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Ingestion Throughput",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
|
|
"id": 8,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": ["mean", "max"],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"pluginVersion": "10.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.95, sum(rate(tempo_query_frontend_result_metrics_duration_seconds_bucket{job=\"tempo\"}[5m])) by (le))",
|
|
"legendFormat": "p95",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.50, sum(rate(tempo_query_frontend_result_metrics_duration_seconds_bucket{job=\"tempo\"}[5m])) by (le))",
|
|
"legendFormat": "p50",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Query Latency",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"refresh": "1m",
|
|
"schemaVersion": 38,
|
|
"tags": ["tempo", "tracing"],
|
|
"templating": {
|
|
"list": []
|
|
},
|
|
"time": {
|
|
"from": "now-24h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "Tempo",
|
|
"uid": "tempo-homelab",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|