From 508f7a957d4bd195735eda5309055c86a228eb1e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 24 Mar 2026 20:44:56 -0700 Subject: [PATCH] Add Grafana dashboard and Prometheus scraping for snowflake proxy Bind metrics to 0.0.0.0 so Alloy can scrape from k8s, add HOST_IP downward API env var to alloy-ringtail DaemonSet, and add a dashboard with connection rate, traffic rate, country breakdown, and process memory. Co-Authored-By: Claude Opus 4.6 (1M context) --- argocd/manifests/alloy-ringtail/config.alloy | 10 + .../manifests/alloy-ringtail/daemonset.yaml | 4 + .../dashboards/configmap-snowflake-proxy.yaml | 323 ++++++++++++++++++ .../grafana-config/kustomization.yaml | 1 + nixos/ringtail/configuration.nix | 2 +- 5 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 argocd/manifests/grafana-config/dashboards/configmap-snowflake-proxy.yaml diff --git a/argocd/manifests/alloy-ringtail/config.alloy b/argocd/manifests/alloy-ringtail/config.alloy index c63b478..e92ab0f 100644 --- a/argocd/manifests/alloy-ringtail/config.alloy +++ b/argocd/manifests/alloy-ringtail/config.alloy @@ -27,6 +27,16 @@ prometheus.relabel "instance" { } } +// ============== SNOWFLAKE PROXY METRICS ============== + +// Scrape Tor Snowflake proxy metrics from host (systemd service on port 9999) +prometheus.scrape "snowflake_proxy" { + targets = [{"__address__" = coalesce(sys.env("HOST_IP"), "localhost") + ":9999", "job" = "snowflake_proxy"}] + metrics_path = "/internal/metrics" + scrape_interval = "30s" + forward_to = [prometheus.relabel.instance.receiver] +} + // ============== KUBE-STATE-METRICS SCRAPE ============== prometheus.scrape "kube_state_metrics" { diff --git a/argocd/manifests/alloy-ringtail/daemonset.yaml b/argocd/manifests/alloy-ringtail/daemonset.yaml index fffd66e..cdd264d 100644 --- a/argocd/manifests/alloy-ringtail/daemonset.yaml +++ b/argocd/manifests/alloy-ringtail/daemonset.yaml @@ -33,6 +33,10 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP resources: requests: cpu: 50m diff --git a/argocd/manifests/grafana-config/dashboards/configmap-snowflake-proxy.yaml b/argocd/manifests/grafana-config/dashboards/configmap-snowflake-proxy.yaml new file mode 100644 index 0000000..089cae3 --- /dev/null +++ b/argocd/manifests/grafana-config/dashboards/configmap-snowflake-proxy.yaml @@ -0,0 +1,323 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-snowflake-proxy + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + snowflake-proxy.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "title": "Total Connections", + "type": "stat", + "targets": [ + { + "expr": "sum(tor_snowflake_proxy_connections_total)", + "legendFormat": "connections", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "title": "Total Traffic (Inbound)", + "type": "stat", + "targets": [ + { + "expr": "tor_snowflake_proxy_traffic_inbound_bytes_total", + "legendFormat": "inbound", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "title": "Total Traffic (Outbound)", + "type": "stat", + "targets": [ + { + "expr": "tor_snowflake_proxy_traffic_outbound_bytes_total", + "legendFormat": "outbound", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "orange", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "title": "Connection Timeouts", + "type": "stat", + "targets": [ + { + "expr": "tor_snowflake_proxy_connection_timeouts_total", + "legendFormat": "timeouts", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 20, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "title": "Connection Rate", + "type": "timeseries", + "targets": [ + { + "expr": "rate(tor_snowflake_proxy_connections_total[5m])", + "legendFormat": "{{ country }}", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 20, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "title": "Traffic Rate", + "type": "timeseries", + "targets": [ + { + "expr": "rate(tor_snowflake_proxy_traffic_inbound_bytes_total[5m])", + "legendFormat": "inbound", + "refId": "A" + }, + { + "expr": "rate(tor_snowflake_proxy_traffic_outbound_bytes_total[5m])", + "legendFormat": "outbound", + "refId": "B" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "", + "drawStyle": "bars", + "fillOpacity": 80, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" } + } + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "id": 7, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "right", "sortBy": "Total", "sortDesc": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "title": "Connections by Country", + "type": "timeseries", + "targets": [ + { + "expr": "increase(tor_snowflake_proxy_connections_total[1h])", + "legendFormat": "{{ country }}", + "refId": "A" + } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "id": 8, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "title": "Process Memory", + "type": "timeseries", + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"snowflake_proxy\"}", + "legendFormat": "RSS", + "refId": "A" + }, + { + "expr": "process_virtual_memory_bytes{job=\"snowflake_proxy\"}", + "legendFormat": "Virtual", + "refId": "B" + } + ] + } + ], + "schemaVersion": 39, + "tags": ["snowflake", "tor", "anti-censorship"], + "templating": { "list": [] }, + "time": { "from": "now-24h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Snowflake Proxy", + "uid": "snowflake-proxy", + "version": 1 + } diff --git a/argocd/manifests/grafana-config/kustomization.yaml b/argocd/manifests/grafana-config/kustomization.yaml index 6412e8b..a6e8000 100644 --- a/argocd/manifests/grafana-config/kustomization.yaml +++ b/argocd/manifests/grafana-config/kustomization.yaml @@ -27,6 +27,7 @@ resources: - dashboards/configmap-forgejo.yaml - dashboards/configmap-tempo.yaml - dashboards/configmap-alerts.yaml + - dashboards/configmap-snowflake-proxy.yaml # TeslaMate dashboards are fetched by the init-teslamate-dashboards init # container in the Grafana deployment, sourced from mirrors/teslamate on forge. # See argocd/manifests/grafana/deployment.yaml for the version pin. diff --git a/nixos/ringtail/configuration.nix b/nixos/ringtail/configuration.nix index c4b8919..6e18d0c 100644 --- a/nixos/ringtail/configuration.nix +++ b/nixos/ringtail/configuration.nix @@ -499,7 +499,7 @@ in wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { - ExecStart = "${pkgs.snowflake}/bin/proxy -metrics"; + ExecStart = "${pkgs.snowflake}/bin/proxy -metrics -metrics-address 0.0.0.0"; DynamicUser = true; Restart = "always"; RestartSec = 10;