blumeops/argocd/manifests/grafana/deployment.yaml
Erich Blume 2fae0f7161 C0: switch grafana deployment to Recreate strategy
Grafana uses an RWO PVC for SQLite + Bleve search index. RollingUpdate
spawns the new pod before terminating the old one, so the new pod
crashloops on the index lock until rollout timeout. Recreate terminates
the old pod first, letting the new pod acquire the lock cleanly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 06:33:26 -07:00

318 lines
12 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: monitoring
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: grafana
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: grafana
strategy:
# RWO PVC for SQLite + Bleve index — RollingUpdate spawns the new pod
# before the old one terminates, and it crashloops on the index lock.
type: Recreate
template:
metadata:
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: grafana
annotations:
kubectl.kubernetes.io/default-container: grafana
spec:
automountServiceAccountToken: true
serviceAccountName: grafana
securityContext:
fsGroup: 472
runAsGroup: 472
runAsNonRoot: true
runAsUser: 472
initContainers:
- name: init-chown-data
image: docker.io/library/busybox:kustomized
imagePullPolicy: IfNotPresent
command: ["chown", "-R", "472:472", "/var/lib/grafana"]
securityContext:
runAsNonRoot: false
runAsUser: 0
capabilities:
add: ["CHOWN"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: storage
mountPath: /var/lib/grafana
# Fetch TeslaMate dashboards from forge mirror at a pinned tag.
# To upgrade: update TESLAMATE_VERSION below.
- name: init-teslamate-dashboards
image: docker.io/library/alpine:kustomized
imagePullPolicy: IfNotPresent
command: ["sh", "-c"]
args:
- |
set -e
TESLAMATE_VERSION="v3.0.0"
BASE_URL="https://forge.ops.eblu.me/mirrors/teslamate/raw/tag/${TESLAMATE_VERSION}/grafana/dashboards"
DEST="/tmp/dashboards/TeslaMate"
mkdir -p "$DEST"
for f in \
battery-health.json \
charge-level.json \
charges.json \
charging-stats.json \
drive-stats.json \
drives.json \
efficiency.json \
locations.json \
mileage.json \
overview.json \
projected-range.json \
states.json \
statistics.json \
timeline.json \
trip.json \
updates.json \
vampire-drain.json \
visited.json \
; do
wget -q -O "$DEST/$f" "$BASE_URL/$f"
done
# Stamp stable top-level UIDs so stars/bookmarks survive pod restarts.
# Match root-level uid (2-space indent) to avoid clobbering datasource refs.
for f in "$DEST"/*.json; do
uid="teslamate-$(basename "$f" .json)"
sed -i "s/^ \"uid\": *\"[^\"]*\"/ \"uid\": \"${uid}\"/" "$f"
done
echo "Fetched $(ls "$DEST" | wc -l) TeslaMate dashboards"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: sc-dashboard-volume
mountPath: /tmp/dashboards
# Fetch UnPoller (UniFi) dashboards from forge mirror.
# Source: github.com/unpoller/dashboards (v2.0.0 Prometheus set)
- name: init-unpoller-dashboards
image: docker.io/library/alpine:kustomized
imagePullPolicy: IfNotPresent
command: ["sh", "-c"]
args:
- |
set -e
BASE_URL="https://forge.ops.eblu.me/mirrors/unpoller-dashboards/raw/branch/master/v2.0.0"
DEST="/tmp/dashboards/UniFi"
mkdir -p "$DEST"
# DPI dashboard omitted — requires DPI enabled on both UX7 and UnPoller
for f in \
"UniFi-Poller_ Client Insights - Prometheus.json" \
"UniFi-Poller_ Network Sites - Prometheus.json" \
"UniFi-Poller_ UAP Insights - Prometheus.json" \
"UniFi-Poller_ USG Insights - Prometheus.json" \
"UniFi-Poller_ USW Insights - Prometheus.json" \
; do
wget -q -O "$DEST/$f" "$BASE_URL/$(echo "$f" | sed 's/ /%20/g')"
done
# Fix datasource UIDs to match our Prometheus instance
sed -i 's/"uid": *"bdkj55oguty4gd"/"uid": "prometheus"/g' "$DEST"/*.json
sed -i 's/"uid": *"\${DS_PROMETHEUS}"/"uid": "prometheus"/g' "$DEST"/*.json
# Stamp stable top-level UIDs so stars/bookmarks survive pod restarts.
# Match root-level uid (2-space indent) to avoid clobbering datasource refs.
# UIDs must be ≤40 chars (Grafana 12+ enforcement).
for f in "$DEST"/*.json; do
slug=$(basename "$f" .json | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//' | sed 's/^unifi-poller-//')
uid="unpoller-${slug}"
sed -i "s/^ \"uid\": *\"[^\"]*\"/ \"uid\": \"${uid}\"/" "$f"
done
echo "Fetched $(ls "$DEST" | wc -l) UnPoller dashboards"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: sc-dashboard-volume
mountPath: /tmp/dashboards
# Pre-populate ConfigMap dashboards so they exist before Grafana starts.
# Without this, the sidecar and Grafana race: if the provisioner scans
# before the sidecar writes files, it deletes existing DB records and
# re-creates them with new IDs, breaking starred dashboards.
- name: init-configmap-dashboards
image: registry.ops.eblu.me/blumeops/grafana-sidecar:kustomized
imagePullPolicy: IfNotPresent
env:
- name: METHOD
value: LIST
- name: LABEL
value: grafana_dashboard
- name: LABEL_VALUE
value: "1"
- name: FOLDER
value: /tmp/dashboards
- name: RESOURCE
# ConfigMap-only — no dashboards are sourced from Secrets,
# so the ServiceAccount has no read access to secrets.
value: configmap
- name: FOLDER_ANNOTATION
value: grafana_folder
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: sc-dashboard-volume
mountPath: /tmp/dashboards
containers:
# Dashboard sidecar - watches ConfigMaps with grafana_dashboard=1
- name: grafana-sc-dashboard
image: registry.ops.eblu.me/blumeops/grafana-sidecar:kustomized
imagePullPolicy: IfNotPresent
env:
- name: METHOD
value: WATCH
- name: LABEL
value: grafana_dashboard
- name: LABEL_VALUE
value: "1"
- name: FOLDER
value: /tmp/dashboards
- name: RESOURCE
value: configmap
- name: FOLDER_ANNOTATION
value: grafana_folder
- name: REQ_USERNAME
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-user
- name: REQ_PASSWORD
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-password
- name: REQ_URL
value: http://localhost:3000/api/admin/provisioning/dashboards/reload
- name: REQ_METHOD
value: POST
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 10
periodSeconds: 30
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: sc-dashboard-volume
mountPath: /tmp/dashboards
# Grafana
- name: grafana
image: registry.ops.eblu.me/blumeops/grafana:kustomized
imagePullPolicy: IfNotPresent
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: GF_SECURITY_ADMIN_USER
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-user
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-password
- name: GF_PATHS_DATA
value: /var/lib/grafana/
- name: GF_PATHS_LOGS
value: /var/log/grafana
- name: GF_PATHS_PLUGINS
value: /var/lib/grafana/plugins
- name: GF_PATHS_PROVISIONING
value: /etc/grafana/provisioning
envFrom:
- secretRef:
name: grafana-teslamate-datasource
optional: true
- secretRef:
name: grafana-authentik-oauth
optional: true
ports:
- name: http
containerPort: 3000
protocol: TCP
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
httpGet:
path: /api/health
port: 3000
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: config
mountPath: /etc/grafana/grafana.ini
subPath: grafana.ini
- name: config
mountPath: /etc/grafana/provisioning/datasources/datasources.yaml
subPath: datasources.yaml
- name: config
mountPath: /etc/grafana/provisioning/alerting/alerting.yaml
subPath: alerting.yaml
- name: storage
mountPath: /var/lib/grafana
- name: sc-dashboard-volume
mountPath: /tmp/dashboards
- name: sc-dashboard-provider
mountPath: /etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml
subPath: provider.yaml
volumes:
- name: config
configMap:
name: grafana
- name: storage
persistentVolumeClaim:
claimName: grafana
- name: sc-dashboard-volume
emptyDir: {}
- name: sc-dashboard-provider
configMap:
name: grafana-config-dashboards