diff --git a/ansible/roles/alloy/defaults/main.yml b/ansible/roles/alloy/defaults/main.yml index 85f420c..afb14e7 100644 --- a/ansible/roles/alloy/defaults/main.yml +++ b/ansible/roles/alloy/defaults/main.yml @@ -96,3 +96,8 @@ alloy_postgres_database: postgres alloy_op_vault: vg6xf6vvfmoh5hqjjhlhbeoaie alloy_op_postgres_item: guxu3j7ajhjyey6xxl2ovsl2ui alloy_op_postgres_field: alloy-user-pw + +# macOS power metrics collection (via powermetrics, requires root) +alloy_collect_power_metrics: true +alloy_power_metrics_script: /usr/local/bin/macos-power-metrics +alloy_power_metrics_interval: 30 # seconds between collection diff --git a/ansible/roles/alloy/handlers/main.yml b/ansible/roles/alloy/handlers/main.yml index 4132dfb..325a024 100644 --- a/ansible/roles/alloy/handlers/main.yml +++ b/ansible/roles/alloy/handlers/main.yml @@ -4,3 +4,10 @@ launchctl unload ~/Library/LaunchAgents/mcquack.eblume.alloy.plist 2>/dev/null || true launchctl load ~/Library/LaunchAgents/mcquack.eblume.alloy.plist changed_when: true + +- name: Reload macos-power-metrics + ansible.builtin.shell: | + launchctl unload /Library/LaunchDaemons/mcquack.eblume.macos-power-metrics.plist 2>/dev/null || true + launchctl load /Library/LaunchDaemons/mcquack.eblume.macos-power-metrics.plist + become: true + changed_when: true diff --git a/ansible/roles/alloy/tasks/main.yml b/ansible/roles/alloy/tasks/main.yml index 99d256d..a97db7b 100644 --- a/ansible/roles/alloy/tasks/main.yml +++ b/ansible/roles/alloy/tasks/main.yml @@ -93,3 +93,39 @@ when: alloy_launchctl_check.rc != 0 changed_when: true failed_when: false + +# === macOS Power Metrics (requires root) === + +- name: Deploy macos-power-metrics script + ansible.builtin.template: + src: macos-power-metrics.sh.j2 + dest: "{{ alloy_power_metrics_script }}" + mode: '0755' + become: true + notify: Reload macos-power-metrics + when: alloy_collect_power_metrics | default(false) + +- name: Deploy macos-power-metrics LaunchDaemon plist + ansible.builtin.template: + src: macos-power-metrics.plist.j2 + dest: /Library/LaunchDaemons/mcquack.eblume.macos-power-metrics.plist + mode: '0644' + become: true + notify: Reload macos-power-metrics + when: alloy_collect_power_metrics | default(false) + +- name: Check if macos-power-metrics LaunchDaemon is loaded + ansible.builtin.command: launchctl list mcquack.eblume.macos-power-metrics + register: alloy_power_metrics_launchctl_check + changed_when: false + failed_when: false + when: alloy_collect_power_metrics | default(false) + +- name: Load macos-power-metrics LaunchDaemon if not loaded + ansible.builtin.command: launchctl load /Library/LaunchDaemons/mcquack.eblume.macos-power-metrics.plist + become: true + when: + - alloy_collect_power_metrics | default(false) + - alloy_power_metrics_launchctl_check.rc != 0 + changed_when: true + failed_when: false diff --git a/ansible/roles/alloy/templates/macos-power-metrics.plist.j2 b/ansible/roles/alloy/templates/macos-power-metrics.plist.j2 new file mode 100644 index 0000000..34aab1a --- /dev/null +++ b/ansible/roles/alloy/templates/macos-power-metrics.plist.j2 @@ -0,0 +1,21 @@ + + + + + + Label + mcquack.eblume.macos-power-metrics + ProgramArguments + + {{ alloy_power_metrics_script }} + + StartInterval + {{ alloy_power_metrics_interval }} + RunAtLoad + + StandardErrorPath + /var/log/mcquack.macos-power-metrics.err.log + StandardOutPath + /var/log/mcquack.macos-power-metrics.out.log + + diff --git a/ansible/roles/alloy/templates/macos-power-metrics.sh.j2 b/ansible/roles/alloy/templates/macos-power-metrics.sh.j2 new file mode 100644 index 0000000..ca9008b --- /dev/null +++ b/ansible/roles/alloy/templates/macos-power-metrics.sh.j2 @@ -0,0 +1,79 @@ +#!/bin/bash +# {{ ansible_managed }} +# Collects macOS power and thermal metrics for node_exporter textfile collector +# Requires root to run powermetrics + +set -euo pipefail + +OUTPUT_FILE="{{ alloy_textfile_dir }}/macos_power.prom" +TEMP_FILE="${OUTPUT_FILE}.tmp" + +# Run powermetrics for one sample +POWER_OUTPUT=$(/usr/bin/powermetrics --samplers cpu_power,thermal -n 1 -i 1 2>/dev/null || echo "") + +if [ -z "$POWER_OUTPUT" ]; then + # powermetrics failed, write zeros + cat > "$TEMP_FILE" << 'EOF' +# HELP macos_cpu_power_watts CPU power consumption in watts +# TYPE macos_cpu_power_watts gauge +macos_cpu_power_watts 0 +# HELP macos_gpu_power_watts GPU power consumption in watts +# TYPE macos_gpu_power_watts gauge +macos_gpu_power_watts 0 +# HELP macos_ane_power_watts Apple Neural Engine power consumption in watts +# TYPE macos_ane_power_watts gauge +macos_ane_power_watts 0 +# HELP macos_combined_power_watts Combined CPU+GPU+ANE power consumption in watts +# TYPE macos_combined_power_watts gauge +macos_combined_power_watts 0 +# HELP macos_thermal_pressure Current thermal pressure level (0=Nominal, 1=Moderate, 2=Heavy, 3=Critical) +# TYPE macos_thermal_pressure gauge +macos_thermal_pressure 0 +EOF + mv "$TEMP_FILE" "$OUTPUT_FILE" + exit 0 +fi + +# Parse power values (in mW, convert to W) +CPU_POWER_MW=$(echo "$POWER_OUTPUT" | grep "^CPU Power:" | awk '{print $3}' || echo "0") +GPU_POWER_MW=$(echo "$POWER_OUTPUT" | grep "^GPU Power:" | awk '{print $3}' || echo "0") +ANE_POWER_MW=$(echo "$POWER_OUTPUT" | grep "^ANE Power:" | awk '{print $3}' || echo "0") +COMBINED_POWER_MW=$(echo "$POWER_OUTPUT" | grep "^Combined Power" | awk '{print $5}' || echo "0") + +# Convert mW to W (divide by 1000) +CPU_POWER=$(echo "scale=3; ${CPU_POWER_MW:-0} / 1000" | bc) +GPU_POWER=$(echo "scale=3; ${GPU_POWER_MW:-0} / 1000" | bc) +ANE_POWER=$(echo "scale=3; ${ANE_POWER_MW:-0} / 1000" | bc) +COMBINED_POWER=$(echo "scale=3; ${COMBINED_POWER_MW:-0} / 1000" | bc) + +# Parse thermal pressure level +THERMAL_LEVEL=$(echo "$POWER_OUTPUT" | grep "Current pressure level:" | awk '{print $4}' || echo "Nominal") +case "$THERMAL_LEVEL" in + Nominal) THERMAL_VALUE=0 ;; + Moderate) THERMAL_VALUE=1 ;; + Heavy) THERMAL_VALUE=2 ;; + Critical) THERMAL_VALUE=3 ;; + *) THERMAL_VALUE=0 ;; +esac + +# Write metrics +cat > "$TEMP_FILE" << EOF +# HELP macos_cpu_power_watts CPU power consumption in watts +# TYPE macos_cpu_power_watts gauge +macos_cpu_power_watts $CPU_POWER +# HELP macos_gpu_power_watts GPU power consumption in watts +# TYPE macos_gpu_power_watts gauge +macos_gpu_power_watts $GPU_POWER +# HELP macos_ane_power_watts Apple Neural Engine power consumption in watts +# TYPE macos_ane_power_watts gauge +macos_ane_power_watts $ANE_POWER +# HELP macos_combined_power_watts Combined CPU+GPU+ANE power consumption in watts +# TYPE macos_combined_power_watts gauge +macos_combined_power_watts $COMBINED_POWER +# HELP macos_thermal_pressure Current thermal pressure level (0=Nominal, 1=Moderate, 2=Heavy, 3=Critical) +# TYPE macos_thermal_pressure gauge +macos_thermal_pressure $THERMAL_VALUE +EOF + +# Atomic move +mv "$TEMP_FILE" "$OUTPUT_FILE" diff --git a/argocd/manifests/grafana-config/dashboards/configmap-macos.yaml b/argocd/manifests/grafana-config/dashboards/configmap-macos.yaml index a9089f0..936e815 100644 --- a/argocd/manifests/grafana-config/dashboards/configmap-macos.yaml +++ b/argocd/manifests/grafana-config/dashboards/configmap-macos.yaml @@ -271,6 +271,194 @@ data: { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 106, + "panels": [], + "title": "Power & Thermal", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 5 }, + { "color": "orange", "value": 10 }, + { "color": "red", "value": 15 } + ] + }, + "unit": "watt", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 6 }, + "id": 60, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "macos_combined_power_watts{instance=~\"$instance\"}", + "refId": "A" + } + ], + "title": "Total Power", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { "options": { "0": { "color": "green", "index": 0, "text": "Nominal" } }, "type": "value" }, + { "options": { "1": { "color": "yellow", "index": 1, "text": "Moderate" } }, "type": "value" }, + { "options": { "2": { "color": "orange", "index": 2, "text": "Heavy" } }, "type": "value" }, + { "options": { "3": { "color": "red", "index": 3, "text": "Critical" } }, "type": "value" } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "orange", "value": 2 }, + { "color": "red", "value": 3 } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 6 }, + "id": 61, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "macos_thermal_pressure{instance=~\"$instance\"}", + "refId": "A" + } + ], + "title": "Thermal Pressure", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "opacity", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "watt" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "CPU" }, + "properties": [ + { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } + ] + }, + { + "matcher": { "id": "byName", "options": "GPU" }, + "properties": [ + { "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } } + ] + }, + { + "matcher": { "id": "byName", "options": "ANE" }, + "properties": [ + { "id": "color", "value": { "fixedColor": "purple", "mode": "fixed" } } + ] + } + ] + }, + "gridPos": { "h": 8, "w": 16, "x": 8, "y": 6 }, + "id": 62, + "options": { + "legend": { + "calcs": ["mean", "max", "lastNotNull"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "macos_cpu_power_watts{instance=~\"$instance\"}", + "legendFormat": "CPU", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "macos_gpu_power_watts{instance=~\"$instance\"}", + "legendFormat": "GPU", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "macos_ane_power_watts{instance=~\"$instance\"}", + "legendFormat": "ANE", + "refId": "C" + } + ], + "title": "Power Consumption", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, "id": 101, "panels": [], "title": "CPU", @@ -350,7 +538,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, "id": 10, "options": { "legend": { @@ -435,7 +623,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, "id": 11, "options": { "legend": { @@ -471,7 +659,7 @@ data: }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, "id": 102, "panels": [], "title": "Memory (macOS)", @@ -559,7 +747,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, "id": 20, "options": { "legend": { @@ -626,7 +814,7 @@ data: }, "overrides": [] }, - "gridPos": { "h": 8, "w": 4, "x": 12, "y": 15 }, + "gridPos": { "h": 8, "w": 4, "x": 12, "y": 24 }, "id": 21, "options": { "orientation": "auto", @@ -702,7 +890,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 8, "x": 16, "y": 15 }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 24 }, "id": 22, "options": { "legend": { @@ -732,7 +920,7 @@ data: }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, "id": 103, "panels": [], "title": "Disk", @@ -792,7 +980,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 33 }, "id": 30, "options": { "legend": { @@ -874,7 +1062,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 33 }, "id": 31, "options": { "legend": { @@ -904,7 +1092,7 @@ data: }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 41 }, "id": 104, "panels": [], "title": "Filesystem", @@ -930,7 +1118,7 @@ data: }, "overrides": [] }, - "gridPos": { "h": 6, "w": 12, "x": 0, "y": 33 }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 42 }, "id": 40, "options": { "displayMode": "gradient", @@ -991,7 +1179,7 @@ data: }, "overrides": [] }, - "gridPos": { "h": 6, "w": 12, "x": 12, "y": 33 }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 42 }, "id": 41, "options": { "legend": { @@ -1015,7 +1203,7 @@ data: }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 }, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 48 }, "id": 105, "panels": [], "title": "Network", @@ -1075,7 +1263,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 49 }, "id": 50, "options": { "legend": { @@ -1157,7 +1345,7 @@ data: } ] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 40 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 49 }, "id": 51, "options": { "legend": { @@ -1220,7 +1408,7 @@ data: }, "overrides": [] }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 48 }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 57 }, "id": 52, "options": { "legend": {