diff --git a/ansible/roles/alloy/tasks/main.yml b/ansible/roles/alloy/tasks/main.yml index 23ac3c3..4fb6225 100644 --- a/ansible/roles/alloy/tasks/main.yml +++ b/ansible/roles/alloy/tasks/main.yml @@ -50,6 +50,14 @@ # === Deploy configuration === +- name: Deploy PostgreSQL custom queries config + ansible.builtin.template: + src: postgres_queries.yaml.j2 + dest: "{{ alloy_config_dir }}/postgres_queries.yaml" + mode: '0600' + notify: restart alloy + when: alloy_collect_postgres | default(false) + - name: Deploy alloy configuration ansible.builtin.template: src: config.alloy.j2 diff --git a/ansible/roles/alloy/templates/config.alloy.j2 b/ansible/roles/alloy/templates/config.alloy.j2 index 08e3cdf..e0c1cad 100644 --- a/ansible/roles/alloy/templates/config.alloy.j2 +++ b/ansible/roles/alloy/templates/config.alloy.j2 @@ -41,6 +41,9 @@ prometheus.remote_write "prometheus" { // PostgreSQL exporter (read-only metrics via pg_monitor role) prometheus.exporter.postgres "postgresql" { data_source_names = ["postgresql://{{ alloy_postgres_user }}:{{ alloy_postgres_password | urlencode }}@{{ alloy_postgres_host }}:{{ alloy_postgres_port }}/{{ alloy_postgres_database }}?sslmode=disable"] + + // Custom queries for vacuum and XID monitoring + custom_queries_config_path = "/opt/homebrew/etc/grafana-alloy/postgres_queries.yaml" } // Scrape PostgreSQL metrics diff --git a/ansible/roles/alloy/templates/postgres_queries.yaml.j2 b/ansible/roles/alloy/templates/postgres_queries.yaml.j2 new file mode 100644 index 0000000..6714995 --- /dev/null +++ b/ansible/roles/alloy/templates/postgres_queries.yaml.j2 @@ -0,0 +1,20 @@ +# {{ ansible_managed }} +# Custom PostgreSQL queries for XID exhaustion monitoring + +pg_database_xid_age: + query: | + SELECT datname, + age(datfrozenxid) as xid_age, + current_setting('autovacuum_freeze_max_age')::bigint as freeze_max_age + FROM pg_database + WHERE datallowconn + metrics: + - datname: + usage: "LABEL" + description: "Database name" + - xid_age: + usage: "GAUGE" + description: "Age of oldest unfrozen transaction ID" + - freeze_max_age: + usage: "GAUGE" + description: "autovacuum_freeze_max_age setting" diff --git a/ansible/roles/grafana/files/dashboards/postgresql.json b/ansible/roles/grafana/files/dashboards/postgresql.json index 990a278..6935416 100644 --- a/ansible/roles/grafana/files/dashboards/postgresql.json +++ b/ansible/roles/grafana/files/dashboards/postgresql.json @@ -500,7 +500,132 @@ ], "title": "Transactions Rate", "type": "timeseries" - } + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 200000000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 150000000 }, + { "color": "red", "value": 180000000 } + ] + }, + "unit": "short" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 20 }, + "id": 9, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "targets": [ + { + "expr": "pg_database_xid_age_xid_age", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "title": "Transaction ID Age (XID Exhaustion Risk)", + "description": "Age of oldest unfrozen XID. Approaches 2 billion = wraparound danger. Yellow at 150M, Red at 180M (autovacuum_freeze_max_age default is 200M).", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "short" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 20 }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "pg_database_xid_age_xid_age", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "title": "XID Age Over Time", + "type": "timeseries" + }, ], "schemaVersion": 39, "tags": ["postgresql", "database"],