From 4e16116c4fc1a782fb991424cd122c42420484bf Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 19 Feb 2026 11:41:47 -0800 Subject: [PATCH] Port Frigate NVR to ringtail k3s with GPU acceleration Migrate Frigate from indri's minikube (arm64, ZMQ detector) to ringtail's k3s cluster to leverage the RTX 4080 for TensorRT-accelerated ONNX inference. - Enable nvidia-container-toolkit and configure k3s containerd nvidia runtime - Add NVIDIA device plugin ArgoCD app (RuntimeClass + DaemonSet) - Re-target Frigate ArgoCD app to ringtail k3s cluster - Switch image to x86_64 tensorrt variant with runtimeClassName: nvidia - Add GPU resource limit (nvidia.com/gpu: 1) and increase shm to 512Mi - Replace ZMQ detector with ONNX (auto-selects TensorRT execution provider) - Update NFS PV and database PVC comments for ringtail Co-Authored-By: Claude Opus 4.6 --- argocd/apps/frigate.yaml | 2 +- argocd/apps/nvidia-device-plugin.yaml | 18 ++++++++++ argocd/manifests/frigate/configmap.yaml | 5 ++- argocd/manifests/frigate/deployment.yaml | 6 ++-- argocd/manifests/frigate/pv-nfs.yaml | 4 +-- argocd/manifests/frigate/pvc-database.yaml | 2 +- .../nvidia-device-plugin/daemonset.yaml | 36 +++++++++++++++++++ .../nvidia-device-plugin/runtime-class.yaml | 6 ++++ .../feature-frigate-ringtail-gpu.infra.md | 1 + nixos/ringtail/configuration.nix | 12 +++++++ 10 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 argocd/apps/nvidia-device-plugin.yaml create mode 100644 argocd/manifests/nvidia-device-plugin/daemonset.yaml create mode 100644 argocd/manifests/nvidia-device-plugin/runtime-class.yaml create mode 100644 docs/changelog.d/feature-frigate-ringtail-gpu.infra.md diff --git a/argocd/apps/frigate.yaml b/argocd/apps/frigate.yaml index a90f412..c443774 100644 --- a/argocd/apps/frigate.yaml +++ b/argocd/apps/frigate.yaml @@ -11,7 +11,7 @@ spec: targetRevision: main path: argocd/manifests/frigate destination: - server: https://kubernetes.default.svc + server: https://ringtail.tail8d86e.ts.net:6443 namespace: frigate syncPolicy: syncOptions: diff --git a/argocd/apps/nvidia-device-plugin.yaml b/argocd/apps/nvidia-device-plugin.yaml new file mode 100644 index 0000000..af8395f --- /dev/null +++ b/argocd/apps/nvidia-device-plugin.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: nvidia-device-plugin + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/nvidia-device-plugin + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: nvidia-device-plugin + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/frigate/configmap.yaml b/argocd/manifests/frigate/configmap.yaml index 9c96008..df7c70c 100644 --- a/argocd/manifests/frigate/configmap.yaml +++ b/argocd/manifests/frigate/configmap.yaml @@ -56,9 +56,8 @@ data: track: [person, car, dog, cat, bird] detectors: - apple_silicon: - type: zmq - endpoint: tcp://host.minikube.internal:5555 + onnx: + type: onnx model: model_type: yolo-generic diff --git a/argocd/manifests/frigate/deployment.yaml b/argocd/manifests/frigate/deployment.yaml index afe11fb..91c9a77 100644 --- a/argocd/manifests/frigate/deployment.yaml +++ b/argocd/manifests/frigate/deployment.yaml @@ -23,9 +23,10 @@ spec: mountPath: /config-ro - name: config mountPath: /config + runtimeClassName: nvidia containers: - name: frigate - image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-standard-arm64 + image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-tensorrt ports: - containerPort: 5000 name: http @@ -60,6 +61,7 @@ spec: limits: memory: "2Gi" cpu: "2000m" + nvidia.com/gpu: "1" livenessProbe: httpGet: path: /api/version @@ -87,4 +89,4 @@ spec: - name: shm emptyDir: medium: Memory - sizeLimit: 256Mi + sizeLimit: 512Mi diff --git a/argocd/manifests/frigate/pv-nfs.yaml b/argocd/manifests/frigate/pv-nfs.yaml index d3a592b..c7197ab 100644 --- a/argocd/manifests/frigate/pv-nfs.yaml +++ b/argocd/manifests/frigate/pv-nfs.yaml @@ -1,11 +1,11 @@ # NFS PersistentVolume for Frigate recordings -# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for indri +# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for ringtail # # To create on Synology: # 1. Control Panel > Shared Folder > Create # 2. Name: frigate, Location: Volume 1 # 3. Control Panel > File Services > NFS > NFS Rules -# 4. Add rule for "frigate" share: Hostname=indri, Privilege=Read/Write, Squash=No mapping +# 4. Add rule for "frigate" share: Hostname=ringtail, Privilege=Read/Write, Squash=No mapping apiVersion: v1 kind: PersistentVolume metadata: diff --git a/argocd/manifests/frigate/pvc-database.yaml b/argocd/manifests/frigate/pvc-database.yaml index 040bda3..1eacb1d 100644 --- a/argocd/manifests/frigate/pvc-database.yaml +++ b/argocd/manifests/frigate/pvc-database.yaml @@ -1,5 +1,5 @@ # PersistentVolumeClaim for Frigate SQLite database -# Uses minikube's default storage class for local provisioning +# Uses k3s local-path storage class for local provisioning apiVersion: v1 kind: PersistentVolumeClaim metadata: diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml new file mode 100644 index 0000000..50eb94e --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml @@ -0,0 +1,36 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-device-plugin + namespace: nvidia-device-plugin + labels: + app: nvidia-device-plugin +spec: + selector: + matchLabels: + app: nvidia-device-plugin + template: + metadata: + labels: + app: nvidia-device-plugin + spec: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: system-node-critical + containers: + - name: nvidia-device-plugin + image: nvcr.io/nvidia/k8s-device-plugin:v0.17.0 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugins + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: device-plugins + hostPath: + path: /var/lib/kubelet/device-plugins diff --git a/argocd/manifests/nvidia-device-plugin/runtime-class.yaml b/argocd/manifests/nvidia-device-plugin/runtime-class.yaml new file mode 100644 index 0000000..7ba6add --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/runtime-class.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia diff --git a/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md b/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md new file mode 100644 index 0000000..d204a4f --- /dev/null +++ b/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md @@ -0,0 +1 @@ +Port Frigate NVR to ringtail k3s with RTX 4080 GPU acceleration (TensorRT/ONNX), replacing the ZMQ-based Apple Silicon detector on indri. diff --git a/nixos/ringtail/configuration.nix b/nixos/ringtail/configuration.nix index 6cb0581..0882fe5 100644 --- a/nixos/ringtail/configuration.nix +++ b/nixos/ringtail/configuration.nix @@ -35,6 +35,9 @@ in package = config.boot.kernelPackages.nvidiaPackages.stable; }; + # NVIDIA container toolkit (CDI specs + runtime for containerd/k3s GPU pods) + hardware.nvidia-container-toolkit.enable = true; + # Wayland / Sway programs.sway = { enable = true; @@ -109,6 +112,15 @@ in "--write-kubeconfig-mode=644" "--tls-san=ringtail.tail8d86e.ts.net" ]; + containerdConfigTemplate = '' + {{ template "base" . }} + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "${pkgs.nvidia-container-toolkit}/bin/nvidia-container-runtime" + ''; }; # K3s containerd registry mirrors (pull through Zot on indri)