From d7af0048426548a9c8a50761282cc4d64192df0d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 17 Apr 2026 15:05:59 -0700 Subject: [PATCH] Add Forgejo metrics + upstream latency histogram to Fly proxy dashboard - Enable Forgejo /metrics endpoint (app.ini [metrics] section) - Add Alloy scrape target for Forgejo metrics on indri - Add upstream_response_time histogram to Fly proxy Alloy config - Replace single p95 panel with p50/p90/p99 + upstream breakdown filtered to forge.eblu.me host Co-Authored-By: Claude Opus 4.6 (1M context) --- ansible/roles/alloy/defaults/main.yml | 4 ++ ansible/roles/alloy/templates/config.alloy.j2 | 12 +++++ ansible/roles/forgejo/templates/app.ini.j2 | 5 ++ .../dashboards/configmap-flyio.yaml | 52 ++++++++++++++++++- fly/alloy.river | 9 ++++ 5 files changed, 80 insertions(+), 2 deletions(-) diff --git a/ansible/roles/alloy/defaults/main.yml b/ansible/roles/alloy/defaults/main.yml index fa840d4..4cf7432 100644 --- a/ansible/roles/alloy/defaults/main.yml +++ b/ansible/roles/alloy/defaults/main.yml @@ -101,6 +101,10 @@ alloy_op_vault: vg6xf6vvfmoh5hqjjhlhbeoaie alloy_op_postgres_item: guxu3j7ajhjyey6xxl2ovsl2ui alloy_op_postgres_field: alloy-user-pw +# Forgejo metrics collection +alloy_collect_forgejo: true +alloy_forgejo_port: 3001 + # macOS power metrics collection (via powermetrics, requires root) alloy_collect_power_metrics: true alloy_power_metrics_script: /usr/local/bin/macos-power-metrics diff --git a/ansible/roles/alloy/templates/config.alloy.j2 b/ansible/roles/alloy/templates/config.alloy.j2 index 51d2c94..39e4dad 100644 --- a/ansible/roles/alloy/templates/config.alloy.j2 +++ b/ansible/roles/alloy/templates/config.alloy.j2 @@ -74,6 +74,18 @@ prometheus.scrape "zot" { } {% endif %} +{% if alloy_collect_forgejo | default(false) %} +// ============== FORGEJO METRICS ============== + +// Scrape Forgejo's native metrics endpoint +prometheus.scrape "forgejo" { + targets = [{"__address__" = "localhost:{{ alloy_forgejo_port }}"}] + metrics_path = "/metrics" + forward_to = [prometheus.relabel.instance.receiver] + scrape_interval = "{{ alloy_scrape_interval }}" +} +{% endif %} + {% if alloy_collect_logs %} // ============== LOG COLLECTION ============== diff --git a/ansible/roles/forgejo/templates/app.ini.j2 b/ansible/roles/forgejo/templates/app.ini.j2 index fe3de38..9c5b4d5 100644 --- a/ansible/roles/forgejo/templates/app.ini.j2 +++ b/ansible/roles/forgejo/templates/app.ini.j2 @@ -95,6 +95,11 @@ ACCOUNT_LINKING = login USERNAME = nickname REGISTER_EMAIL_CONFIRM = false +[metrics] +ENABLED = true +ENABLED_ISSUE_BY_LABEL = false +ENABLED_ISSUE_BY_REPOSITORY = false + [actions] ENABLED = {{ forgejo_actions_enabled | lower }} DEFAULT_ACTIONS_URL = {{ forgejo_actions_default_url }} diff --git a/argocd/manifests/grafana-config/dashboards/configmap-flyio.yaml b/argocd/manifests/grafana-config/dashboards/configmap-flyio.yaml index 981f7ea..8d97918 100644 --- a/argocd/manifests/grafana-config/dashboards/configmap-flyio.yaml +++ b/argocd/manifests/grafana-config/dashboards/configmap-flyio.yaml @@ -249,9 +249,57 @@ data: "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ - { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{instance=\"flyio-proxy\"}[5m])))", "legendFormat": "p95", "refId": "A" } + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.90, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "p90", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "p99", "refId": "C" } ], - "title": "Upstream Response Time p95", + "title": "Proxy: Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 22 }, + "id": 8, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(flyio_nginx_upstream_response_time_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "upstream p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.90, sum by (le) (rate(flyio_nginx_upstream_response_time_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "upstream p90", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(flyio_nginx_upstream_response_time_seconds_bucket{instance=\"flyio-proxy\",host=\"forge.eblu.me\"}[5m])))", "legendFormat": "upstream p99", "refId": "C" } + ], + "title": "Forgejo: Upstream Response Time", "type": "timeseries" } ], diff --git a/fly/alloy.river b/fly/alloy.river index c504247..015583c 100644 --- a/fly/alloy.river +++ b/fly/alloy.river @@ -65,6 +65,15 @@ loki.process "nginx" { } } + stage.metrics { + metric.histogram { + name = "flyio_nginx_upstream_response_time_seconds" + description = "Upstream (Forgejo) response time in seconds, excluding proxy overhead." + source = "upstream_response_time" + buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 15, 20, 30, 45, 60] + } + } + stage.metrics { metric.counter { name = "flyio_nginx_http_response_bytes_total"