Port Frigate NVR to ringtail k3s with GPU acceleration (#217)

## Summary

- Enable NVIDIA container toolkit on ringtail NixOS and configure k3s containerd with nvidia runtime
- Add NVIDIA device plugin ArgoCD app (RuntimeClass + DaemonSet) to expose `nvidia.com/gpu` resources
- Re-target Frigate from indri minikube (arm64, ZMQ detector) to ringtail k3s (x86_64, TensorRT/ONNX)
- Switch Frigate image to `-tensorrt` variant with GPU resource limits and increased shared memory

## Manual Prerequisites

1. **NFS access**: Verify ringtail can mount `sifaka:/volume1/frigate`
   ```fish
   ssh ringtail 'sudo mount -t nfs sifaka:/volume1/frigate /mnt/storage1 && ls /mnt/storage1 && sudo umount /mnt/storage1'
   ```
2. **YOLO model**: Verify `/volume1/frigate/models/yolov9m.onnx` exists on sifaka

## Deployment Steps

1. Provision ringtail: `mise run provision-ringtail`
2. Sync ArgoCD apps: `argocd app sync apps --prune`
3. Deploy NVIDIA device plugin: `argocd app sync nvidia-device-plugin`
4. Verify GPU: `kubectl --context=k3s-ringtail get nodes -o json | jq '.items[].status.capacity'`
5. Deploy Frigate: `argocd app sync frigate`

## Verification

- [ ] `nvidia.com/gpu: 1` visible in node capacity
- [ ] Frigate pod running with GPU allocated
- [ ] Frigate UI loads at `https://nvr.ops.eblu.me`
- [ ] Detector shows ONNX/TensorRT on System page
- [ ] Camera feed with bounding boxes in live view
- [ ] TensorRT engine build completes (watch logs on first start)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/217
This commit is contained in:
Erich Blume 2026-02-19 14:27:04 -08:00
commit d5d32fe91f
14 changed files with 157 additions and 12 deletions

View file

@ -11,7 +11,7 @@ spec:
targetRevision: main
path: argocd/manifests/frigate
destination:
server: https://kubernetes.default.svc
server: https://ringtail.tail8d86e.ts.net:6443
namespace: frigate
syncPolicy:
syncOptions:

View file

@ -0,0 +1,18 @@
---
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: nvidia-device-plugin
namespace: argocd
spec:
project: default
source:
repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
targetRevision: main
path: argocd/manifests/nvidia-device-plugin
destination:
server: https://ringtail.tail8d86e.ts.net:6443
namespace: nvidia-device-plugin
syncPolicy:
syncOptions:
- CreateNamespace=true

View file

@ -23,6 +23,16 @@ data:
general:
title: "Frigate Alert"
zones:
unzoned: drop
allow:
- driveway_entrance
labels:
allow:
- person
- car
ntfy:
enabled: true
server: http://ntfy.ntfy.svc.cluster.local:80

View file

@ -56,17 +56,16 @@ data:
track: [person, car, dog, cat, bird]
detectors:
apple_silicon:
type: zmq
endpoint: tcp://host.minikube.internal:5555
onnx:
type: onnx
model:
model_type: yolo-generic
model_type: yolonas
width: 320
height: 320
input_tensor: nchw
input_dtype: float
path: /media/frigate/models/yolov9m.onnx
input_dtype: int
path: /media/frigate/models/yolo_nas_s.onnx
labelmap_path: /labelmap/coco-80.txt
record:

View file

@ -17,6 +17,9 @@ spec:
containers:
- name: frigate-notify
image: ghcr.io/0x2142/frigate-notify:v0.3.5
env:
- name: TZ
value: America/Los_Angeles
volumeMounts:
- name: config
mountPath: /app/config.yml

View file

@ -6,6 +6,8 @@ metadata:
namespace: frigate
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: frigate
@ -14,6 +16,7 @@ spec:
labels:
app: frigate
spec:
runtimeClassName: nvidia
initContainers:
- name: copy-config
image: busybox:1.37
@ -25,7 +28,7 @@ spec:
mountPath: /config
containers:
- name: frigate
image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-standard-arm64
image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-tensorrt
ports:
- containerPort: 5000
name: http
@ -60,6 +63,7 @@ spec:
limits:
memory: "2Gi"
cpu: "2000m"
nvidia.com/gpu: "1"
livenessProbe:
httpGet:
path: /api/version
@ -87,4 +91,4 @@ spec:
- name: shm
emptyDir:
medium: Memory
sizeLimit: 256Mi
sizeLimit: 512Mi

View file

@ -1,11 +1,11 @@
# NFS PersistentVolume for Frigate recordings
# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for indri
# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for ringtail
#
# To create on Synology:
# 1. Control Panel > Shared Folder > Create
# 2. Name: frigate, Location: Volume 1
# 3. Control Panel > File Services > NFS > NFS Rules
# 4. Add rule for "frigate" share: Hostname=indri, Privilege=Read/Write, Squash=No mapping
# 4. Add rule for "frigate" share: Hostname=ringtail, Privilege=Read/Write, Squash=No mapping
apiVersion: v1
kind: PersistentVolume
metadata:

View file

@ -1,5 +1,5 @@
# PersistentVolumeClaim for Frigate SQLite database
# Uses minikube's default storage class for local provisioning
# Uses k3s local-path storage class for local provisioning
apiVersion: v1
kind: PersistentVolumeClaim
metadata:

View file

@ -135,6 +135,17 @@ config:
# type: caddy
# url: http://indri.tail8d86e.ts.net:2019
# Services on ringtail k3s (not autodiscovered — different cluster)
- Infrastructure:
- NVR:
href: https://nvr.ops.eblu.me
icon: frigate.png
description: Network video recorder
- Ntfy:
href: https://ntfy.ops.eblu.me
icon: ntfy.png
description: Push notifications
# External bookmarks
bookmarks:
- Admin:

View file

@ -0,0 +1,51 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin
namespace: nvidia-device-plugin
labels:
app: nvidia-device-plugin
spec:
selector:
matchLabels:
app: nvidia-device-plugin
template:
metadata:
labels:
app: nvidia-device-plugin
spec:
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
priorityClassName: system-node-critical
containers:
- name: nvidia-device-plugin
image: nvcr.io/nvidia/k8s-device-plugin:v0.18.2
args:
- --device-id-strategy=index
env:
- name: LD_LIBRARY_PATH
value: /run/nvidia/lib
securityContext:
privileged: true
volumeMounts:
- name: device-plugins
mountPath: /var/lib/kubelet/device-plugins
- name: cdi-specs
mountPath: /var/run/cdi
readOnly: true
- name: nvidia-libs
mountPath: /run/nvidia/lib
readOnly: true
volumes:
- name: device-plugins
hostPath:
path: /var/lib/kubelet/device-plugins
- name: cdi-specs
hostPath:
path: /var/run/cdi
- name: nvidia-libs
hostPath:
path: /etc/nvidia-driver/lib

View file

@ -0,0 +1,6 @@
---
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia

View file

@ -0,0 +1 @@
Port Frigate NVR to ringtail k3s with RTX 4080 GPU acceleration (TensorRT/ONNX), replacing the ZMQ-based Apple Silicon detector on indri.

View file

@ -35,6 +35,28 @@ in
package = config.boot.kernelPackages.nvidiaPackages.stable;
};
# NVIDIA container toolkit (CDI specs + runtime for containerd/k3s GPU pods)
hardware.nvidia-container-toolkit.enable = true;
# Stable path to NVIDIA driver libraries for k3s device plugin pod mounts.
# Avoids mounting all of /nix/store — only the driver derivation is needed.
environment.etc."nvidia-driver/lib".source = "${config.hardware.nvidia.package}/lib";
# Stable-path wrapper for nvidia-container-runtime.cdi (the CDI-based OCI
# runtime that injects GPU devices/libs from NixOS-generated CDI specs).
# The wrapper adds runc to PATH since k3s doesn't ship a standalone runc binary.
environment.etc."nvidia-container-runtime/nvidia-runtime-cdi-wrapper" = {
mode = "0755";
text = ''
#!/bin/sh
export PATH="${pkgs.runc}/bin:$PATH"
exec ${pkgs.nvidia-container-toolkit.tools}/bin/nvidia-container-runtime.cdi "$@"
'';
};
# NFS client support (required for k3s to mount NFS PersistentVolumes)
boot.supportedFilesystems = [ "nfs" ];
# Wayland / Sway
programs.sway = {
enable = true;
@ -109,6 +131,19 @@ in
"--write-kubeconfig-mode=644"
"--tls-san=ringtail.tail8d86e.ts.net"
];
containerdConfigTemplate = ''
{{ template "base" . }}
[plugins.'io.containerd.cri.v1.runtime']
enable_cdi = true
cdi_spec_dirs = ["/var/run/cdi", "/etc/cdi"]
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_type = "io.containerd.runc.v2"
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia.options]
BinaryName = "/etc/nvidia-container-runtime/nvidia-runtime-cdi-wrapper"
'';
};
# K3s containerd registry mirrors (pull through Zot on indri)

View file

@ -51,6 +51,13 @@ services:
upstream-source: https://github.com/gethomepage/homepage/releases
notes: Deployed via Helm chart
- name: nvidia-device-plugin
type: argocd
last-reviewed: 2026-02-19
current-version: "v0.18.2"
upstream-source: https://github.com/NVIDIA/k8s-device-plugin/releases
notes: DaemonSet + RuntimeClass on ringtail for GPU workloads
- name: frigate
type: argocd
last-reviewed: 2026-02-17