Port Frigate NVR to ringtail k3s with GPU acceleration (#217)
## Summary - Enable NVIDIA container toolkit on ringtail NixOS and configure k3s containerd with nvidia runtime - Add NVIDIA device plugin ArgoCD app (RuntimeClass + DaemonSet) to expose `nvidia.com/gpu` resources - Re-target Frigate from indri minikube (arm64, ZMQ detector) to ringtail k3s (x86_64, TensorRT/ONNX) - Switch Frigate image to `-tensorrt` variant with GPU resource limits and increased shared memory ## Manual Prerequisites 1. **NFS access**: Verify ringtail can mount `sifaka:/volume1/frigate` ```fish ssh ringtail 'sudo mount -t nfs sifaka:/volume1/frigate /mnt/storage1 && ls /mnt/storage1 && sudo umount /mnt/storage1' ``` 2. **YOLO model**: Verify `/volume1/frigate/models/yolov9m.onnx` exists on sifaka ## Deployment Steps 1. Provision ringtail: `mise run provision-ringtail` 2. Sync ArgoCD apps: `argocd app sync apps --prune` 3. Deploy NVIDIA device plugin: `argocd app sync nvidia-device-plugin` 4. Verify GPU: `kubectl --context=k3s-ringtail get nodes -o json | jq '.items[].status.capacity'` 5. Deploy Frigate: `argocd app sync frigate` ## Verification - [ ] `nvidia.com/gpu: 1` visible in node capacity - [ ] Frigate pod running with GPU allocated - [ ] Frigate UI loads at `https://nvr.ops.eblu.me` - [ ] Detector shows ONNX/TensorRT on System page - [ ] Camera feed with bounding boxes in live view - [ ] TensorRT engine build completes (watch logs on first start) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/217
This commit is contained in:
parent
16a4a9a616
commit
d5d32fe91f
14 changed files with 157 additions and 12 deletions
|
|
@ -11,7 +11,7 @@ spec:
|
|||
targetRevision: main
|
||||
path: argocd/manifests/frigate
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
server: https://ringtail.tail8d86e.ts.net:6443
|
||||
namespace: frigate
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
|
|
|
|||
18
argocd/apps/nvidia-device-plugin.yaml
Normal file
18
argocd/apps/nvidia-device-plugin.yaml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: nvidia-device-plugin
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/nvidia-device-plugin
|
||||
destination:
|
||||
server: https://ringtail.tail8d86e.ts.net:6443
|
||||
namespace: nvidia-device-plugin
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
|
|
@ -23,6 +23,16 @@ data:
|
|||
general:
|
||||
title: "Frigate Alert"
|
||||
|
||||
zones:
|
||||
unzoned: drop
|
||||
allow:
|
||||
- driveway_entrance
|
||||
|
||||
labels:
|
||||
allow:
|
||||
- person
|
||||
- car
|
||||
|
||||
ntfy:
|
||||
enabled: true
|
||||
server: http://ntfy.ntfy.svc.cluster.local:80
|
||||
|
|
|
|||
|
|
@ -56,17 +56,16 @@ data:
|
|||
track: [person, car, dog, cat, bird]
|
||||
|
||||
detectors:
|
||||
apple_silicon:
|
||||
type: zmq
|
||||
endpoint: tcp://host.minikube.internal:5555
|
||||
onnx:
|
||||
type: onnx
|
||||
|
||||
model:
|
||||
model_type: yolo-generic
|
||||
model_type: yolonas
|
||||
width: 320
|
||||
height: 320
|
||||
input_tensor: nchw
|
||||
input_dtype: float
|
||||
path: /media/frigate/models/yolov9m.onnx
|
||||
input_dtype: int
|
||||
path: /media/frigate/models/yolo_nas_s.onnx
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
|
||||
record:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ spec:
|
|||
containers:
|
||||
- name: frigate-notify
|
||||
image: ghcr.io/0x2142/frigate-notify:v0.3.5
|
||||
env:
|
||||
- name: TZ
|
||||
value: America/Los_Angeles
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /app/config.yml
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ metadata:
|
|||
namespace: frigate
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: frigate
|
||||
|
|
@ -14,6 +16,7 @@ spec:
|
|||
labels:
|
||||
app: frigate
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
initContainers:
|
||||
- name: copy-config
|
||||
image: busybox:1.37
|
||||
|
|
@ -25,7 +28,7 @@ spec:
|
|||
mountPath: /config
|
||||
containers:
|
||||
- name: frigate
|
||||
image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-standard-arm64
|
||||
image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-tensorrt
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
name: http
|
||||
|
|
@ -60,6 +63,7 @@ spec:
|
|||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "2000m"
|
||||
nvidia.com/gpu: "1"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/version
|
||||
|
|
@ -87,4 +91,4 @@ spec:
|
|||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 256Mi
|
||||
sizeLimit: 512Mi
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# NFS PersistentVolume for Frigate recordings
|
||||
# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for indri
|
||||
# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for ringtail
|
||||
#
|
||||
# To create on Synology:
|
||||
# 1. Control Panel > Shared Folder > Create
|
||||
# 2. Name: frigate, Location: Volume 1
|
||||
# 3. Control Panel > File Services > NFS > NFS Rules
|
||||
# 4. Add rule for "frigate" share: Hostname=indri, Privilege=Read/Write, Squash=No mapping
|
||||
# 4. Add rule for "frigate" share: Hostname=ringtail, Privilege=Read/Write, Squash=No mapping
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# PersistentVolumeClaim for Frigate SQLite database
|
||||
# Uses minikube's default storage class for local provisioning
|
||||
# Uses k3s local-path storage class for local provisioning
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
|
|
|
|||
|
|
@ -135,6 +135,17 @@ config:
|
|||
# type: caddy
|
||||
# url: http://indri.tail8d86e.ts.net:2019
|
||||
|
||||
# Services on ringtail k3s (not autodiscovered — different cluster)
|
||||
- Infrastructure:
|
||||
- NVR:
|
||||
href: https://nvr.ops.eblu.me
|
||||
icon: frigate.png
|
||||
description: Network video recorder
|
||||
- Ntfy:
|
||||
href: https://ntfy.ops.eblu.me
|
||||
icon: ntfy.png
|
||||
description: Push notifications
|
||||
|
||||
# External bookmarks
|
||||
bookmarks:
|
||||
- Admin:
|
||||
|
|
|
|||
51
argocd/manifests/nvidia-device-plugin/daemonset.yaml
Normal file
51
argocd/manifests/nvidia-device-plugin/daemonset.yaml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-device-plugin
|
||||
namespace: nvidia-device-plugin
|
||||
labels:
|
||||
app: nvidia-device-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: nvidia-device-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: nvidia-device-plugin
|
||||
spec:
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
priorityClassName: system-node-critical
|
||||
containers:
|
||||
- name: nvidia-device-plugin
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.18.2
|
||||
args:
|
||||
- --device-id-strategy=index
|
||||
env:
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: /run/nvidia/lib
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: device-plugins
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
- name: cdi-specs
|
||||
mountPath: /var/run/cdi
|
||||
readOnly: true
|
||||
- name: nvidia-libs
|
||||
mountPath: /run/nvidia/lib
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: device-plugins
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: cdi-specs
|
||||
hostPath:
|
||||
path: /var/run/cdi
|
||||
- name: nvidia-libs
|
||||
hostPath:
|
||||
path: /etc/nvidia-driver/lib
|
||||
6
argocd/manifests/nvidia-device-plugin/runtime-class.yaml
Normal file
6
argocd/manifests/nvidia-device-plugin/runtime-class.yaml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
apiVersion: node.k8s.io/v1
|
||||
kind: RuntimeClass
|
||||
metadata:
|
||||
name: nvidia
|
||||
handler: nvidia
|
||||
1
docs/changelog.d/feature-frigate-ringtail-gpu.infra.md
Normal file
1
docs/changelog.d/feature-frigate-ringtail-gpu.infra.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
Port Frigate NVR to ringtail k3s with RTX 4080 GPU acceleration (TensorRT/ONNX), replacing the ZMQ-based Apple Silicon detector on indri.
|
||||
|
|
@ -35,6 +35,28 @@ in
|
|||
package = config.boot.kernelPackages.nvidiaPackages.stable;
|
||||
};
|
||||
|
||||
# NVIDIA container toolkit (CDI specs + runtime for containerd/k3s GPU pods)
|
||||
hardware.nvidia-container-toolkit.enable = true;
|
||||
|
||||
# Stable path to NVIDIA driver libraries for k3s device plugin pod mounts.
|
||||
# Avoids mounting all of /nix/store — only the driver derivation is needed.
|
||||
environment.etc."nvidia-driver/lib".source = "${config.hardware.nvidia.package}/lib";
|
||||
|
||||
# Stable-path wrapper for nvidia-container-runtime.cdi (the CDI-based OCI
|
||||
# runtime that injects GPU devices/libs from NixOS-generated CDI specs).
|
||||
# The wrapper adds runc to PATH since k3s doesn't ship a standalone runc binary.
|
||||
environment.etc."nvidia-container-runtime/nvidia-runtime-cdi-wrapper" = {
|
||||
mode = "0755";
|
||||
text = ''
|
||||
#!/bin/sh
|
||||
export PATH="${pkgs.runc}/bin:$PATH"
|
||||
exec ${pkgs.nvidia-container-toolkit.tools}/bin/nvidia-container-runtime.cdi "$@"
|
||||
'';
|
||||
};
|
||||
|
||||
# NFS client support (required for k3s to mount NFS PersistentVolumes)
|
||||
boot.supportedFilesystems = [ "nfs" ];
|
||||
|
||||
# Wayland / Sway
|
||||
programs.sway = {
|
||||
enable = true;
|
||||
|
|
@ -109,6 +131,19 @@ in
|
|||
"--write-kubeconfig-mode=644"
|
||||
"--tls-san=ringtail.tail8d86e.ts.net"
|
||||
];
|
||||
containerdConfigTemplate = ''
|
||||
{{ template "base" . }}
|
||||
|
||||
[plugins.'io.containerd.cri.v1.runtime']
|
||||
enable_cdi = true
|
||||
cdi_spec_dirs = ["/var/run/cdi", "/etc/cdi"]
|
||||
|
||||
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia]
|
||||
privileged_without_host_devices = false
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia.options]
|
||||
BinaryName = "/etc/nvidia-container-runtime/nvidia-runtime-cdi-wrapper"
|
||||
'';
|
||||
};
|
||||
|
||||
# K3s containerd registry mirrors (pull through Zot on indri)
|
||||
|
|
|
|||
|
|
@ -51,6 +51,13 @@ services:
|
|||
upstream-source: https://github.com/gethomepage/homepage/releases
|
||||
notes: Deployed via Helm chart
|
||||
|
||||
- name: nvidia-device-plugin
|
||||
type: argocd
|
||||
last-reviewed: 2026-02-19
|
||||
current-version: "v0.18.2"
|
||||
upstream-source: https://github.com/NVIDIA/k8s-device-plugin/releases
|
||||
notes: DaemonSet + RuntimeClass on ringtail for GPU workloads
|
||||
|
||||
- name: frigate
|
||||
type: argocd
|
||||
last-reviewed: 2026-02-17
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue