## Summary - Remove stale `/opt/homebrew/var/loki` from borgmatic backup (Loki migrated to k8s) - Add Alloy k8s DaemonSet for automatic pod log collection with auto-discovery - Add blackbox probes for miniflux, kiwix, transmission, devpi, argocd - Add transmission-exporter sidecar for full metrics (speed, torrent counts, ratios) - Replace stale devpi dashboard with probe-based metrics (status, response time, uptime) - Add unified "K8s Services Health" dashboard for service uptime/response monitoring ## Manual cleanup already performed - Deleted stale textfile metrics on indri: `devpi.prom`, `transmission.prom` - Deleted stale data directories on indri: `/opt/homebrew/var/loki/`, `/opt/homebrew/var/prometheus/` ## Deployment and Testing - [x] Sync `apps` application to pick up new alloy-k8s app - [x] Deploy alloy-k8s on feature branch: `argocd app set alloy-k8s --revision feature/observability-cleanup && argocd app sync alloy-k8s` - [x] Deploy torrent on feature branch (for transmission exporter): `argocd app set torrent --revision feature/observability-cleanup && argocd app sync torrent` - [x] Deploy prometheus on feature branch (for new scrape config): `argocd app set prometheus --revision feature/observability-cleanup && argocd app sync prometheus` - [x] Deploy grafana-config on feature branch (for dashboards): `argocd app set grafana-config --revision feature/observability-cleanup && argocd app sync grafana-config` - [x] Verify pod logs appear in Loki/Grafana - [x] Verify transmission metrics appear in Prometheus - [x] Verify service probe metrics appear in Prometheus - [x] Run `mise run provision-indri -- --tags borgmatic` to update borgmatic config - [ ] After merge, reset apps to main and resync 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/43
67 lines
1.6 KiB
YAML
67 lines
1.6 KiB
YAML
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: alloy
|
|
namespace: alloy
|
|
labels:
|
|
app: alloy
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: alloy
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: alloy
|
|
spec:
|
|
serviceAccountName: alloy
|
|
securityContext:
|
|
fsGroup: 473 # alloy user group
|
|
containers:
|
|
- name: alloy
|
|
image: grafana/alloy:v1.5.1
|
|
args:
|
|
- run
|
|
- --server.http.listen-addr=0.0.0.0:12345
|
|
- --storage.path=/var/lib/alloy/data
|
|
- /etc/alloy/config.alloy
|
|
ports:
|
|
- containerPort: 12345
|
|
name: http
|
|
env:
|
|
- name: HOSTNAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
resources:
|
|
requests:
|
|
cpu: 50m
|
|
memory: 128Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
volumeMounts:
|
|
- name: config
|
|
mountPath: /etc/alloy
|
|
- name: varlog
|
|
mountPath: /var/log
|
|
readOnly: true
|
|
- name: data
|
|
mountPath: /var/lib/alloy/data
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
capabilities:
|
|
drop:
|
|
- ALL
|
|
tolerations:
|
|
- operator: Exists
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: alloy-config
|
|
- name: varlog
|
|
hostPath:
|
|
path: /var/log
|
|
- name: data
|
|
emptyDir: {}
|