diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml deleted file mode 100644 index a9f303d..0000000 --- a/ansible/group_vars/all.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git" diff --git a/ansible/inventory/group_vars/all.yml b/ansible/inventory/group_vars/all.yml new file mode 100644 index 0000000..342a493 --- /dev/null +++ b/ansible/inventory/group_vars/all.yml @@ -0,0 +1,6 @@ +--- +ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git" + +# Sifaka NAS exporter ports — shared by caddy (indri) and sifaka_exporters roles +sifaka_node_exporter_port: 9100 +sifaka_smartctl_exporter_port: 9633 diff --git a/ansible/inventory/host_vars/sifaka.yml b/ansible/inventory/host_vars/sifaka.yml new file mode 100644 index 0000000..1afd4d8 --- /dev/null +++ b/ansible/inventory/host_vars/sifaka.yml @@ -0,0 +1,3 @@ +--- +ansible_user: eblume +ansible_python_interpreter: /usr/bin/python3 diff --git a/ansible/playbooks/sifaka.yml b/ansible/playbooks/sifaka.yml new file mode 100644 index 0000000..511a358 --- /dev/null +++ b/ansible/playbooks/sifaka.yml @@ -0,0 +1,7 @@ +--- +- name: Configure sifaka + hosts: nas + + roles: + - role: sifaka_exporters + tags: sifaka_exporters diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index c35ef76..5d88fbf 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -84,3 +84,7 @@ caddy_tcp_services: backend: "localhost:2200" # Forgejo SSH - port: 5432 backend: "pg.tail8d86e.ts.net:5432" # PostgreSQL + - port: "{{ sifaka_node_exporter_port }}" + backend: "sifaka:{{ sifaka_node_exporter_port }}" # Sifaka node_exporter + - port: "{{ sifaka_smartctl_exporter_port }}" + backend: "sifaka:{{ sifaka_smartctl_exporter_port }}" # Sifaka smartctl_exporter diff --git a/ansible/roles/sifaka_exporters/defaults/main.yml b/ansible/roles/sifaka_exporters/defaults/main.yml new file mode 100644 index 0000000..a7acd4e --- /dev/null +++ b/ansible/roles/sifaka_exporters/defaults/main.yml @@ -0,0 +1,15 @@ +--- +# Docker images for Prometheus exporters on sifaka NAS +# Ports are defined in group_vars/all.yml (shared with caddy role) +sifaka_exporters_docker: /volume1/@appstore/ContainerManager/usr/bin/docker +sifaka_exporters_node_exporter_image: "prom/node-exporter:latest" +sifaka_exporters_node_exporter_name: "prom-node-exporter-1" +sifaka_exporters_smartctl_exporter_image: "prometheuscommunity/smartctl-exporter:latest" +sifaka_exporters_smartctl_exporter_name: "smartctl-exporter" + +# Synology uses /dev/sata* instead of /dev/sd* — smartctl can't auto-detect them +sifaka_exporters_smartctl_devices: + - /dev/sata1 + - /dev/sata2 + - /dev/sata3 + - /dev/sata4 diff --git a/ansible/roles/sifaka_exporters/handlers/main.yml b/ansible/roles/sifaka_exporters/handlers/main.yml new file mode 100644 index 0000000..f4c6355 --- /dev/null +++ b/ansible/roles/sifaka_exporters/handlers/main.yml @@ -0,0 +1,12 @@ +--- +- name: Restart node_exporter + ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_node_exporter_name }}" + become: true + listen: Restart node_exporter + changed_when: true + +- name: Restart smartctl_exporter + ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_smartctl_exporter_name }}" + become: true + listen: Restart smartctl_exporter + changed_when: true diff --git a/ansible/roles/sifaka_exporters/tasks/main.yml b/ansible/roles/sifaka_exporters/tasks/main.yml new file mode 100644 index 0000000..5d3a77c --- /dev/null +++ b/ansible/roles/sifaka_exporters/tasks/main.yml @@ -0,0 +1,91 @@ +--- +# Manage Prometheus exporter containers on sifaka NAS +# Uses command module to avoid requiring docker Python SDK on Synology +# Requires passwordless sudo for docker — see docs/reference/storage/sifaka.md + +# --- node_exporter --- + +- name: Pull node_exporter image + ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_node_exporter_image }}" + become: true + register: sifaka_exporters_node_pull + changed_when: "'Downloaded newer image' in sifaka_exporters_node_pull.stdout" + +- name: Check if node_exporter container exists + ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_node_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}" + become: true + register: sifaka_exporters_node_inspect + changed_when: false + failed_when: false + +- name: Remove node_exporter container if image changed + ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_node_exporter_name }}" + become: true + when: + - sifaka_exporters_node_inspect.rc == 0 + - sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image + changed_when: true + +- name: Start node_exporter container + ansible.builtin.command: + argv: + - "{{ sifaka_exporters_docker }}" + - run + - -d + - "--name={{ sifaka_exporters_node_exporter_name }}" + - --restart=always + - --net=host + - "{{ sifaka_exporters_node_exporter_image }}" + become: true + register: sifaka_exporters_node_start + when: > + sifaka_exporters_node_inspect.rc != 0 or + sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image + changed_when: sifaka_exporters_node_start.rc == 0 + +# --- smartctl_exporter --- + +- name: Pull smartctl_exporter image + ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_smartctl_exporter_image }}" + become: true + register: sifaka_exporters_smartctl_pull + changed_when: "'Downloaded newer image' in sifaka_exporters_smartctl_pull.stdout" + +- name: Check if smartctl_exporter container exists + ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_smartctl_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}" + become: true + register: sifaka_exporters_smartctl_inspect + changed_when: false + failed_when: false + +- name: Remove smartctl_exporter container if image changed + ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_smartctl_exporter_name }}" + become: true + when: + - sifaka_exporters_smartctl_inspect.rc == 0 + - sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image + changed_when: true + +- name: Build smartctl_exporter device arguments + ansible.builtin.set_fact: + sifaka_exporters_smartctl_device_args: >- + {{ sifaka_exporters_smartctl_devices | map('regex_replace', '^(.*)$', '--smartctl.device=\1') | list }} + +- name: Start smartctl_exporter container + ansible.builtin.command: + argv: >- + {{ [ + sifaka_exporters_docker, 'run', '-d', + '--name=' + sifaka_exporters_smartctl_exporter_name, + '--restart=always', + '--privileged', + '--user=root', + '-p', sifaka_smartctl_exporter_port | string + ':' + sifaka_smartctl_exporter_port | string, + sifaka_exporters_smartctl_exporter_image + ] + sifaka_exporters_smartctl_device_args }} + become: true + register: sifaka_exporters_smartctl_start + when: > + sifaka_exporters_smartctl_inspect.rc != 0 or + sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image + changed_when: sifaka_exporters_smartctl_start.rc == 0 diff --git a/argocd/manifests/grafana-config/dashboards/configmap-sifaka-disks.yaml b/argocd/manifests/grafana-config/dashboards/configmap-sifaka-disks.yaml new file mode 100644 index 0000000..a92ec23 --- /dev/null +++ b/argocd/manifests/grafana-config/dashboards/configmap-sifaka-disks.yaml @@ -0,0 +1,314 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-sifaka-disks + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + sifaka-disks.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "Health Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { "options": { "0": { "color": "red", "text": "FAILING" }, "1": { "color": "green", "text": "HEALTHY" } }, "type": "value" } + ], + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 24, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_smart_status{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}} ({{model_name}})", "refId": "A" } + ], + "title": "SMART Health Status", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, + "id": 101, + "panels": [], + "title": "Temperature", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 40 }, { "color": "red", "value": 50 }] }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 8 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_temperature{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Current Temperature", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line+area" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "transparent", "value": null }, { "color": "red", "value": 50 }] }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_temperature{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Temperature Over Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, + "id": 102, + "panels": [], + "title": "Wear Indicators", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 21 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Reallocated_Sector_Ct\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Reallocated Sectors", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 25 }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Current_Pending_Sector\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Pending Sectors", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 100 }] } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 29 }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"UDMA_CRC_Error_Count\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "UDMA CRC Errors", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 33 }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Offline_Uncorrectable\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Offline Uncorrectable Sectors", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 37 }, + "id": 103, + "panels": [], + "title": "Lifetime", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "h" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 38 }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_power_on_seconds{job=\"smartctl-sifaka\"} / 3600", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Power-On Hours", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 42 }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_power_cycle_count{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" } + ], + "title": "Power Cycle Count", + "type": "stat" + } + ], + "refresh": "1m", + "schemaVersion": 38, + "tags": ["sifaka", "storage", "smart"], + "templating": { "list": [] }, + "time": { "from": "now-24h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Sifaka Disk Health", + "uid": "sifaka-disk-health", + "version": 1, + "weekStart": "" + } diff --git a/argocd/manifests/grafana-config/kustomization.yaml b/argocd/manifests/grafana-config/kustomization.yaml index 00dc5c6..a0a2356 100644 --- a/argocd/manifests/grafana-config/kustomization.yaml +++ b/argocd/manifests/grafana-config/kustomization.yaml @@ -19,6 +19,7 @@ resources: - dashboards/configmap-zot.yaml - dashboards/configmap-docs-apm.yaml - dashboards/configmap-flyio.yaml + - dashboards/configmap-sifaka-disks.yaml # TeslaMate dashboards - dashboards/configmap-teslamate-overview.yaml - dashboards/configmap-teslamate-charges.yaml diff --git a/argocd/manifests/prometheus/configmap.yaml b/argocd/manifests/prometheus/configmap.yaml index cc43999..0881d2e 100644 --- a/argocd/manifests/prometheus/configmap.yaml +++ b/argocd/manifests/prometheus/configmap.yaml @@ -13,12 +13,15 @@ data: # K8s services are scraped directly scrape_configs: - # Sifaka NAS node-exporter (via LAN - Docker NATs through indri) - # Using LAN IP since k8s pods can reach LAN via Docker NAT (same as NFS mounts) - # If IP changes, fallback: create Tailscale egress in tailscale-operator/egress-sifaka.yaml + # Sifaka NAS exporters (via Caddy L4 TCP proxy on indri) - job_name: "node-exporter-sifaka" static_configs: - - targets: ["192.168.1.203:9100"] + - targets: ["nas.ops.eblu.me:9100"] + + - job_name: "smartctl-sifaka" + scrape_interval: 60s + static_configs: + - targets: ["nas.ops.eblu.me:9633"] # CNPG PostgreSQL metrics (k8s internal) - job_name: "cnpg-postgres" diff --git a/docs/changelog.d/feature-sifaka-ops-observability.feature.md b/docs/changelog.d/feature-sifaka-ops-observability.feature.md new file mode 100644 index 0000000..156e253 --- /dev/null +++ b/docs/changelog.d/feature-sifaka-ops-observability.feature.md @@ -0,0 +1 @@ +Add SMART disk health monitoring for sifaka NAS with smartctl_exporter, Grafana dashboard, Ansible playbook, and Caddy L4 routing via ops.eblu.me. diff --git a/docs/reference/infrastructure/routing.md b/docs/reference/infrastructure/routing.md index cf8e115..9270909 100644 --- a/docs/reference/infrastructure/routing.md +++ b/docs/reference/infrastructure/routing.md @@ -62,6 +62,8 @@ DNS CNAMEs point to `blumeops-proxy.fly.dev`. TLS via Fly.io-managed Let's Encry | 443 | Caddy | HTTPS | 0.0.0.0 | Reverse proxy | | 2222 | Caddy L4 | TCP | 0.0.0.0 | SSH proxy to Forgejo | | 5432 | Caddy L4 | TCP | 0.0.0.0 | PostgreSQL proxy | +| 9100 | Caddy L4 | TCP | 0.0.0.0 | Sifaka node_exporter proxy | +| 9633 | Caddy L4 | TCP | 0.0.0.0 | Sifaka smartctl_exporter proxy | | 2200 | Forgejo SSH | TCP | localhost | Built-in SSH server | | 3001 | Forgejo | HTTP | localhost | Web UI | | 5050 | Zot | HTTP | localhost | Registry API | diff --git a/docs/reference/storage/sifaka.md b/docs/reference/storage/sifaka.md index caad5c5..cd751cd 100644 --- a/docs/reference/storage/sifaka.md +++ b/docs/reference/storage/sifaka.md @@ -13,8 +13,8 @@ Synology NAS providing network storage and backup target. | Property | Value | |----------|-------| | **Dashboard** | https://nas.ops.eblu.me | -| **Model** | Synology | -| **Storage** | 10.9TB RAID 5 | +| **Model** | Synology DS423+ (DSM 7) | +| **Storage** | 10.9TB RAID 5 (4x Seagate IronWolf 4TB, ST4000VN006) | | **Role** | Backup target, media storage | ## Network Shares @@ -37,7 +37,70 @@ Synology NAS providing network storage and backup target. ## Monitoring -Node exporter running in Docker container, scraped by [[prometheus]] at `sifaka:9100`. +Prometheus exporters run as Docker containers, managed by Ansible (`mise run provision-sifaka`). + +| Exporter | Port | Purpose | +|----------|------|---------| +| node_exporter | 9100 | System metrics (CPU, memory, disk I/O) | +| smartctl_exporter | 9633 | SMART disk health data | + +Scraped by [[prometheus]] via Caddy L4 TCP proxy at `nas.ops.eblu.me:9100` and `nas.ops.eblu.me:9633`. Dashboard: [[grafana]] > Sifaka Disk Health. + +## First-Time Setup + +These steps were performed once to enable Ansible provisioning. They are documented here for reference if sifaka is ever replaced or reset. + +### 1. Enable SSH + +DSM Control Panel > Terminal & SNMP > Enable SSH service (port 22). + +### 2. SSH Key Authentication + +From a tailnet client with an existing SSH key: + +```bash +ssh-copy-id eblume@sifaka # uses password auth initially +``` + +Synology requires strict permissions on the home directory. On sifaka: + +```bash +chmod 755 ~ # DSM defaults to 777; SSH refuses keys otherwise +chmod 700 ~/.ssh +chmod 600 ~/.ssh/authorized_keys +``` + +Home directory path: `/var/services/homes/eblume`. + +### 3. Passwordless Sudo for Docker + +Ansible needs `become: true` for Docker commands. Create a sudoers drop-in: + +```bash +sudo vi /etc/sudoers.d/docker-ansible +``` + +Contents: + +``` +eblume ALL=(ALL) NOPASSWD: /volume1/@appstore/ContainerManager/usr/bin/docker +``` + +This grants passwordless sudo only for the Docker binary — no broader root access. + +### 4. Docker Path + +Synology installs Docker via Container Manager at a non-standard path: + +``` +/volume1/@appstore/ContainerManager/usr/bin/docker +``` + +This is configured in the `sifaka_exporters` role defaults. + +### 5. Synology Device Naming + +Synology uses `/dev/sata*` (e.g., `/dev/sata1` through `/dev/sata4`) instead of the standard `/dev/sd*` naming. The `smartctl_exporter` cannot auto-detect these devices, so they are passed explicitly via `--smartctl.device=` flags in the Ansible role. ## Tailscale diff --git a/mise-tasks/provision-sifaka b/mise-tasks/provision-sifaka new file mode 100755 index 0000000..8ef0631 --- /dev/null +++ b/mise-tasks/provision-sifaka @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +#MISE description="Run ansible playbook to provision sifaka" + +set -euo pipefail + +export MISE_TASK_OUTPUT=interleave + +cd ansible +ansible-playbook playbooks/sifaka.yml "$@"