Operations and observability for sifaka NAS #135
15 changed files with 538 additions and 9 deletions
|
|
@ -1,2 +0,0 @@
|
|||
---
|
||||
ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git"
|
||||
6
ansible/inventory/group_vars/all.yml
Normal file
6
ansible/inventory/group_vars/all.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
ansible_managed: "Managed by ansible - do not edit. Source: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git"
|
||||
|
||||
# Sifaka NAS exporter ports — shared by caddy (indri) and sifaka_exporters roles
|
||||
sifaka_node_exporter_port: 9100
|
||||
sifaka_smartctl_exporter_port: 9633
|
||||
3
ansible/inventory/host_vars/sifaka.yml
Normal file
3
ansible/inventory/host_vars/sifaka.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
ansible_user: eblume
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
7
ansible/playbooks/sifaka.yml
Normal file
7
ansible/playbooks/sifaka.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
- name: Configure sifaka
|
||||
hosts: nas
|
||||
|
||||
roles:
|
||||
- role: sifaka_exporters
|
||||
tags: sifaka_exporters
|
||||
|
|
@ -84,3 +84,7 @@ caddy_tcp_services:
|
|||
backend: "localhost:2200" # Forgejo SSH
|
||||
- port: 5432
|
||||
backend: "pg.tail8d86e.ts.net:5432" # PostgreSQL
|
||||
- port: "{{ sifaka_node_exporter_port }}"
|
||||
backend: "sifaka:{{ sifaka_node_exporter_port }}" # Sifaka node_exporter
|
||||
- port: "{{ sifaka_smartctl_exporter_port }}"
|
||||
backend: "sifaka:{{ sifaka_smartctl_exporter_port }}" # Sifaka smartctl_exporter
|
||||
|
|
|
|||
15
ansible/roles/sifaka_exporters/defaults/main.yml
Normal file
15
ansible/roles/sifaka_exporters/defaults/main.yml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
# Docker images for Prometheus exporters on sifaka NAS
|
||||
# Ports are defined in group_vars/all.yml (shared with caddy role)
|
||||
sifaka_exporters_docker: /volume1/@appstore/ContainerManager/usr/bin/docker
|
||||
sifaka_exporters_node_exporter_image: "prom/node-exporter:latest"
|
||||
sifaka_exporters_node_exporter_name: "prom-node-exporter-1"
|
||||
sifaka_exporters_smartctl_exporter_image: "prometheuscommunity/smartctl-exporter:latest"
|
||||
sifaka_exporters_smartctl_exporter_name: "smartctl-exporter"
|
||||
|
||||
# Synology uses /dev/sata* instead of /dev/sd* — smartctl can't auto-detect them
|
||||
sifaka_exporters_smartctl_devices:
|
||||
- /dev/sata1
|
||||
- /dev/sata2
|
||||
- /dev/sata3
|
||||
- /dev/sata4
|
||||
12
ansible/roles/sifaka_exporters/handlers/main.yml
Normal file
12
ansible/roles/sifaka_exporters/handlers/main.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
- name: Restart node_exporter
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_node_exporter_name }}"
|
||||
become: true
|
||||
listen: Restart node_exporter
|
||||
changed_when: true
|
||||
|
||||
- name: Restart smartctl_exporter
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} restart {{ sifaka_exporters_smartctl_exporter_name }}"
|
||||
become: true
|
||||
listen: Restart smartctl_exporter
|
||||
changed_when: true
|
||||
91
ansible/roles/sifaka_exporters/tasks/main.yml
Normal file
91
ansible/roles/sifaka_exporters/tasks/main.yml
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
# Manage Prometheus exporter containers on sifaka NAS
|
||||
# Uses command module to avoid requiring docker Python SDK on Synology
|
||||
# Requires passwordless sudo for docker — see docs/reference/storage/sifaka.md
|
||||
|
||||
# --- node_exporter ---
|
||||
|
||||
- name: Pull node_exporter image
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_node_exporter_image }}"
|
||||
become: true
|
||||
register: sifaka_exporters_node_pull
|
||||
changed_when: "'Downloaded newer image' in sifaka_exporters_node_pull.stdout"
|
||||
|
||||
- name: Check if node_exporter container exists
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_node_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}"
|
||||
become: true
|
||||
register: sifaka_exporters_node_inspect
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Remove node_exporter container if image changed
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_node_exporter_name }}"
|
||||
become: true
|
||||
when:
|
||||
- sifaka_exporters_node_inspect.rc == 0
|
||||
- sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image
|
||||
changed_when: true
|
||||
|
||||
- name: Start node_exporter container
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- "{{ sifaka_exporters_docker }}"
|
||||
- run
|
||||
- -d
|
||||
- "--name={{ sifaka_exporters_node_exporter_name }}"
|
||||
- --restart=always
|
||||
- --net=host
|
||||
- "{{ sifaka_exporters_node_exporter_image }}"
|
||||
become: true
|
||||
register: sifaka_exporters_node_start
|
||||
when: >
|
||||
sifaka_exporters_node_inspect.rc != 0 or
|
||||
sifaka_exporters_node_inspect.stdout != sifaka_exporters_node_exporter_image
|
||||
changed_when: sifaka_exporters_node_start.rc == 0
|
||||
|
||||
# --- smartctl_exporter ---
|
||||
|
||||
- name: Pull smartctl_exporter image
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} pull {{ sifaka_exporters_smartctl_exporter_image }}"
|
||||
become: true
|
||||
register: sifaka_exporters_smartctl_pull
|
||||
changed_when: "'Downloaded newer image' in sifaka_exporters_smartctl_pull.stdout"
|
||||
|
||||
- name: Check if smartctl_exporter container exists
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} inspect {{ sifaka_exporters_smartctl_exporter_name }} --format {% raw %}'{{.Config.Image}}'{% endraw %}"
|
||||
become: true
|
||||
register: sifaka_exporters_smartctl_inspect
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Remove smartctl_exporter container if image changed
|
||||
ansible.builtin.command: "{{ sifaka_exporters_docker }} rm -f {{ sifaka_exporters_smartctl_exporter_name }}"
|
||||
become: true
|
||||
when:
|
||||
- sifaka_exporters_smartctl_inspect.rc == 0
|
||||
- sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image
|
||||
changed_when: true
|
||||
|
||||
- name: Build smartctl_exporter device arguments
|
||||
ansible.builtin.set_fact:
|
||||
sifaka_exporters_smartctl_device_args: >-
|
||||
{{ sifaka_exporters_smartctl_devices | map('regex_replace', '^(.*)$', '--smartctl.device=\1') | list }}
|
||||
|
||||
- name: Start smartctl_exporter container
|
||||
ansible.builtin.command:
|
||||
argv: >-
|
||||
{{ [
|
||||
sifaka_exporters_docker, 'run', '-d',
|
||||
'--name=' + sifaka_exporters_smartctl_exporter_name,
|
||||
'--restart=always',
|
||||
'--privileged',
|
||||
'--user=root',
|
||||
'-p', sifaka_smartctl_exporter_port | string + ':' + sifaka_smartctl_exporter_port | string,
|
||||
sifaka_exporters_smartctl_exporter_image
|
||||
] + sifaka_exporters_smartctl_device_args }}
|
||||
become: true
|
||||
register: sifaka_exporters_smartctl_start
|
||||
when: >
|
||||
sifaka_exporters_smartctl_inspect.rc != 0 or
|
||||
sifaka_exporters_smartctl_inspect.stdout != sifaka_exporters_smartctl_exporter_image
|
||||
changed_when: sifaka_exporters_smartctl_start.rc == 0
|
||||
|
|
@ -0,0 +1,314 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-sifaka-disks
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
sifaka-disks.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"id": 100,
|
||||
"panels": [],
|
||||
"title": "Health Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [
|
||||
{ "options": { "0": { "color": "red", "text": "FAILING" }, "1": { "color": "green", "text": "HEALTHY" } }, "type": "value" }
|
||||
],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 1 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_smart_status{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}} ({{model_name}})", "refId": "A" }
|
||||
],
|
||||
"title": "SMART Health Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
|
||||
"id": 101,
|
||||
"panels": [],
|
||||
"title": "Temperature",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 40 }, { "color": "red", "value": 50 }] },
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 8 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_temperature{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Current Temperature",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "line+area" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "transparent", "value": null }, { "color": "red", "value": 50 }] },
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_temperature{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Temperature Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 },
|
||||
"id": 102,
|
||||
"panels": [],
|
||||
"title": "Wear Indicators",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 21 },
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Reallocated_Sector_Ct\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Reallocated Sectors",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 25 },
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Current_Pending_Sector\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Pending Sectors",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 100 }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 29 },
|
||||
"id": 6,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"UDMA_CRC_Error_Count\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "UDMA CRC Errors",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 10 }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 33 },
|
||||
"id": 7,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_attribute{job=\"smartctl-sifaka\", attribute_name=\"Offline_Uncorrectable\", attribute_value_type=\"raw\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Offline Uncorrectable Sectors",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 37 },
|
||||
"id": 103,
|
||||
"panels": [],
|
||||
"title": "Lifetime",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
|
||||
"unit": "h"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 38 },
|
||||
"id": 8,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_power_on_seconds{job=\"smartctl-sifaka\"} / 3600", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Power-On Hours",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"mappings": [],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 42 },
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "smartctl_device_power_cycle_count{job=\"smartctl-sifaka\"}", "legendFormat": "{{device}}", "refId": "A" }
|
||||
],
|
||||
"title": "Power Cycle Count",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 38,
|
||||
"tags": ["sifaka", "storage", "smart"],
|
||||
"templating": { "list": [] },
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Sifaka Disk Health",
|
||||
"uid": "sifaka-disk-health",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
|
@ -19,6 +19,7 @@ resources:
|
|||
- dashboards/configmap-zot.yaml
|
||||
- dashboards/configmap-docs-apm.yaml
|
||||
- dashboards/configmap-flyio.yaml
|
||||
- dashboards/configmap-sifaka-disks.yaml
|
||||
# TeslaMate dashboards
|
||||
- dashboards/configmap-teslamate-overview.yaml
|
||||
- dashboards/configmap-teslamate-charges.yaml
|
||||
|
|
|
|||
|
|
@ -13,12 +13,15 @@ data:
|
|||
# K8s services are scraped directly
|
||||
|
||||
scrape_configs:
|
||||
# Sifaka NAS node-exporter (via LAN - Docker NATs through indri)
|
||||
# Using LAN IP since k8s pods can reach LAN via Docker NAT (same as NFS mounts)
|
||||
# If IP changes, fallback: create Tailscale egress in tailscale-operator/egress-sifaka.yaml
|
||||
# Sifaka NAS exporters (via Caddy L4 TCP proxy on indri)
|
||||
- job_name: "node-exporter-sifaka"
|
||||
static_configs:
|
||||
- targets: ["192.168.1.203:9100"]
|
||||
- targets: ["nas.ops.eblu.me:9100"]
|
||||
|
||||
- job_name: "smartctl-sifaka"
|
||||
scrape_interval: 60s
|
||||
static_configs:
|
||||
- targets: ["nas.ops.eblu.me:9633"]
|
||||
|
||||
# CNPG PostgreSQL metrics (k8s internal)
|
||||
- job_name: "cnpg-postgres"
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Add SMART disk health monitoring for sifaka NAS with smartctl_exporter, Grafana dashboard, Ansible playbook, and Caddy L4 routing via ops.eblu.me.
|
||||
|
|
@ -62,6 +62,8 @@ DNS CNAMEs point to `blumeops-proxy.fly.dev`. TLS via Fly.io-managed Let's Encry
|
|||
| 443 | Caddy | HTTPS | 0.0.0.0 | Reverse proxy |
|
||||
| 2222 | Caddy L4 | TCP | 0.0.0.0 | SSH proxy to Forgejo |
|
||||
| 5432 | Caddy L4 | TCP | 0.0.0.0 | PostgreSQL proxy |
|
||||
| 9100 | Caddy L4 | TCP | 0.0.0.0 | Sifaka node_exporter proxy |
|
||||
| 9633 | Caddy L4 | TCP | 0.0.0.0 | Sifaka smartctl_exporter proxy |
|
||||
| 2200 | Forgejo SSH | TCP | localhost | Built-in SSH server |
|
||||
| 3001 | Forgejo | HTTP | localhost | Web UI |
|
||||
| 5050 | Zot | HTTP | localhost | Registry API |
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@ Synology NAS providing network storage and backup target.
|
|||
| Property | Value |
|
||||
|----------|-------|
|
||||
| **Dashboard** | https://nas.ops.eblu.me |
|
||||
| **Model** | Synology |
|
||||
| **Storage** | 10.9TB RAID 5 |
|
||||
| **Model** | Synology DS423+ (DSM 7) |
|
||||
| **Storage** | 10.9TB RAID 5 (4x Seagate IronWolf 4TB, ST4000VN006) |
|
||||
| **Role** | Backup target, media storage |
|
||||
|
||||
## Network Shares
|
||||
|
|
@ -37,7 +37,70 @@ Synology NAS providing network storage and backup target.
|
|||
|
||||
## Monitoring
|
||||
|
||||
Node exporter running in Docker container, scraped by [[prometheus]] at `sifaka:9100`.
|
||||
Prometheus exporters run as Docker containers, managed by Ansible (`mise run provision-sifaka`).
|
||||
|
||||
| Exporter | Port | Purpose |
|
||||
|----------|------|---------|
|
||||
| node_exporter | 9100 | System metrics (CPU, memory, disk I/O) |
|
||||
| smartctl_exporter | 9633 | SMART disk health data |
|
||||
|
||||
Scraped by [[prometheus]] via Caddy L4 TCP proxy at `nas.ops.eblu.me:9100` and `nas.ops.eblu.me:9633`. Dashboard: [[grafana]] > Sifaka Disk Health.
|
||||
|
||||
## First-Time Setup
|
||||
|
||||
These steps were performed once to enable Ansible provisioning. They are documented here for reference if sifaka is ever replaced or reset.
|
||||
|
||||
### 1. Enable SSH
|
||||
|
||||
DSM Control Panel > Terminal & SNMP > Enable SSH service (port 22).
|
||||
|
||||
### 2. SSH Key Authentication
|
||||
|
||||
From a tailnet client with an existing SSH key:
|
||||
|
||||
```bash
|
||||
ssh-copy-id eblume@sifaka # uses password auth initially
|
||||
```
|
||||
|
||||
Synology requires strict permissions on the home directory. On sifaka:
|
||||
|
||||
```bash
|
||||
chmod 755 ~ # DSM defaults to 777; SSH refuses keys otherwise
|
||||
chmod 700 ~/.ssh
|
||||
chmod 600 ~/.ssh/authorized_keys
|
||||
```
|
||||
|
||||
Home directory path: `/var/services/homes/eblume`.
|
||||
|
||||
### 3. Passwordless Sudo for Docker
|
||||
|
||||
Ansible needs `become: true` for Docker commands. Create a sudoers drop-in:
|
||||
|
||||
```bash
|
||||
sudo vi /etc/sudoers.d/docker-ansible
|
||||
```
|
||||
|
||||
Contents:
|
||||
|
||||
```
|
||||
eblume ALL=(ALL) NOPASSWD: /volume1/@appstore/ContainerManager/usr/bin/docker
|
||||
```
|
||||
|
||||
This grants passwordless sudo only for the Docker binary — no broader root access.
|
||||
|
||||
### 4. Docker Path
|
||||
|
||||
Synology installs Docker via Container Manager at a non-standard path:
|
||||
|
||||
```
|
||||
/volume1/@appstore/ContainerManager/usr/bin/docker
|
||||
```
|
||||
|
||||
This is configured in the `sifaka_exporters` role defaults.
|
||||
|
||||
### 5. Synology Device Naming
|
||||
|
||||
Synology uses `/dev/sata*` (e.g., `/dev/sata1` through `/dev/sata4`) instead of the standard `/dev/sd*` naming. The `smartctl_exporter` cannot auto-detect these devices, so they are passed explicitly via `--smartctl.device=` flags in the Ansible role.
|
||||
|
||||
## Tailscale
|
||||
|
||||
|
|
|
|||
9
mise-tasks/provision-sifaka
Executable file
9
mise-tasks/provision-sifaka
Executable file
|
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
#MISE description="Run ansible playbook to provision sifaka"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
export MISE_TASK_OUTPUT=interleave
|
||||
|
||||
cd ansible
|
||||
ansible-playbook playbooks/sifaka.yml "$@"
|
||||
Loading…
Add table
Add a link
Reference in a new issue