Operations and observability for sifaka NAS (#135)

## Summary
- Add `smartctl_exporter` Docker container to sifaka for SMART disk health monitoring
- Formalize existing `node_exporter` container under Ansible management
- Route both exporters through Caddy L4 TCP proxy (`nas.ops.eblu.me:9100`, `nas.ops.eblu.me:9633`), replacing the hardcoded LAN IP in Prometheus
- Create "Sifaka Disk Health" Grafana dashboard (health status, temperature, wear indicators, lifetime)
- Introduce `ansible/playbooks/sifaka.yml` and `mise run provision-sifaka` — first Ansible playbook for the NAS
- Shared exporter port variables in `group_vars/all.yml` to avoid duplication between Caddy and sifaka roles

## Prerequisites before deploy
- [ ] Enable SSH on sifaka (DSM Control Panel > Terminal & SNMP)
- [ ] Verify `ssh eblume@sifaka 'docker ps'` works
- [ ] Run `mise run provision-sifaka` to deploy containers
- [ ] Run `mise run provision-indri -- --tags caddy` to add L4 routes
- [ ] `argocd app sync prometheus` + `argocd app sync grafana-config`

## Test plan
- [ ] Verify smartctl_exporter metrics: `curl http://nas.ops.eblu.me:9633/metrics`
- [ ] Verify Prometheus targets page shows both sifaka jobs as UP
- [ ] Verify Grafana "Sifaka Disk Health" dashboard loads with data

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/135
This commit is contained in:
Erich Blume 2026-02-09 17:44:05 -08:00
commit 85e36cd807
15 changed files with 538 additions and 9 deletions

View file

@ -1,2 +0,0 @@
---
ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git"

View file

@ -0,0 +1,6 @@
---
ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git"
# Sifaka NAS exporter ports — shared by caddy (indri) and sifaka_exporters roles
sifaka_node_exporter_port: 9100
sifaka_smartctl_exporter_port: 9633

View file

@ -0,0 +1,3 @@
---
ansible_user: eblume
ansible_python_interpreter: /usr/bin/python3

View file

@ -0,0 +1,7 @@
---
- name: Configure sifaka
hosts: nas
roles:
- role: sifaka_exporters
tags: sifaka_exporters

View file

@ -84,3 +84,7 @@ caddy_tcp_services:
backend: "localhost:2200" # Forgejo SSH
- port: 5432
backend: "pg.tail8d86e.ts.net:5432" # PostgreSQL
- port: "{{ sifaka_node_exporter_port }}"
backend: "sifaka:{{ sifaka_node_exporter_port }}" # Sifaka node_exporter
- port: "{{ sifaka_smartctl_exporter_port }}"
backend: "sifaka:{{ sifaka_smartctl_exporter_port }}" # Sifaka smartctl_exporter

View file

@ -0,0 +1,15 @@
---
# Docker images for Prometheus exporters on sifaka NAS
# Ports are defined in group_vars/all.yml (shared with caddy role)
sifaka_exporters_docker: /volume1/@appstore/ContainerManager/usr/bin/docker
sifaka_exporters_node_exporter_image: "prom/node-exporter:latest"
sifaka_exporters_node_exporter_name: "prom-node-exporter-1"
sifaka_exporters_smartctl_exporter_image: "prometheuscommunity/smartctl-exporter:latest"
sifaka_exporters_smartctl_exporter_name: "smartctl-exporter"
# Synology uses /dev/sata* instead of /dev/sd* — smartctl can't auto-detect them
sifaka_exporters_smartctl_devices:
- /dev/sata1
- /dev/sata2
- /dev/sata3
- /dev/sata4

View file

@ -0,0 +1,12 @@
---
- name: Restart node_exporter
ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_node_exporter_name }}"
become: true
listen: Restart node_exporter
changed_when: true
- name: Restart smartctl_exporter
ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_smartctl_exporter_name }}"
become: true
listen: Restart smartctl_exporter
changed_when: true

View file

@ -0,0 +1,91 @@
---
# Manage Prometheus exporter containers on sifaka NAS
# Uses command module to avoid requiring docker Python SDK on Synology
# Requires passwordless sudo for docker — see docs/reference/storage/sifaka.md
# --- node_exporter ---
- name: Pull node_exporter image
ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_node_exporter_image }}"
become: true
register: sifaka_exporters_node_pull
changed_when: "'Downloaded newer image' in sifaka_exporters_node_pull.stdout"
- name: Check if node_exporter container exists
ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_node_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}"
become: true
register: sifaka_exporters_node_inspect
changed_when: false
failed_when: false
- name: Remove node_exporter container if image changed
ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_node_exporter_name }}"
become: true
when:
- sifaka_exporters_node_inspect.rc == 0
- sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image
changed_when: true
- name: Start node_exporter container
ansible.builtin.command:
argv:
- "{{ sifaka_exporters_docker }}"
- run
- -d
- "--name={{ sifaka_exporters_node_exporter_name }}"
- --restart=always
- --net=host
- "{{ sifaka_exporters_node_exporter_image }}"
become: true
register: sifaka_exporters_node_start
when: >
sifaka_exporters_node_inspect.rc != 0 or
sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image
changed_when: sifaka_exporters_node_start.rc == 0
# --- smartctl_exporter ---
- name: Pull smartctl_exporter image
ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_smartctl_exporter_image }}"
become: true
register: sifaka_exporters_smartctl_pull
changed_when: "'Downloaded newer image' in sifaka_exporters_smartctl_pull.stdout"
- name: Check if smartctl_exporter container exists
ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_smartctl_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}"
become: true
register: sifaka_exporters_smartctl_inspect
changed_when: false
failed_when: false
- name: Remove smartctl_exporter container if image changed
ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_smartctl_exporter_name }}"
become: true
when:
- sifaka_exporters_smartctl_inspect.rc == 0
- sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image
changed_when: true
- name: Build smartctl_exporter device arguments
ansible.builtin.set_fact:
sifaka_exporters_smartctl_device_args: >-
{{ sifaka_exporters_smartctl_devices | map('regex_replace', '^(.*)$', '--smartctl.device=\1') | list }}
- name: Start smartctl_exporter container
ansible.builtin.command:
argv: >-
{{ [
sifaka_exporters_docker, 'run', '-d',
'--name=' + sifaka_exporters_smartctl_exporter_name,
'--restart=always',
'--privileged',
'--user=root',
'-p', sifaka_smartctl_exporter_port | string + ':' + sifaka_smartctl_exporter_port | string,
sifaka_exporters_smartctl_exporter_image
] + sifaka_exporters_smartctl_device_args }}
become: true
register: sifaka_exporters_smartctl_start
when: >
sifaka_exporters_smartctl_inspect.rc != 0 or
sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image
changed_when: sifaka_exporters_smartctl_start.rc == 0