diff --git a/argocd/apps/frigate.yaml b/argocd/apps/frigate.yaml index a90f412..c443774 100644 --- a/argocd/apps/frigate.yaml +++ b/argocd/apps/frigate.yaml @@ -11,7 +11,7 @@ spec: targetRevision: main path: argocd/manifests/frigate destination: - server: https://kubernetes.default.svc + server: https://ringtail.tail8d86e.ts.net:6443 namespace: frigate syncPolicy: syncOptions: diff --git a/argocd/apps/nvidia-device-plugin.yaml b/argocd/apps/nvidia-device-plugin.yaml new file mode 100644 index 0000000..af8395f --- /dev/null +++ b/argocd/apps/nvidia-device-plugin.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: nvidia-device-plugin + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/nvidia-device-plugin + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: nvidia-device-plugin + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/frigate/configmap-notify.yaml b/argocd/manifests/frigate/configmap-notify.yaml index ed357ad..890557a 100644 --- a/argocd/manifests/frigate/configmap-notify.yaml +++ b/argocd/manifests/frigate/configmap-notify.yaml @@ -23,6 +23,16 @@ data: general: title: "Frigate Alert" + zones: + unzoned: drop + allow: + - driveway_entrance + + labels: + allow: + - person + - car + ntfy: enabled: true server: http://ntfy.ntfy.svc.cluster.local:80 diff --git a/argocd/manifests/frigate/configmap.yaml b/argocd/manifests/frigate/configmap.yaml index 9c96008..8dd0aba 100644 --- a/argocd/manifests/frigate/configmap.yaml +++ b/argocd/manifests/frigate/configmap.yaml @@ -56,17 +56,16 @@ data: track: [person, car, dog, cat, bird] detectors: - apple_silicon: - type: zmq - endpoint: tcp://host.minikube.internal:5555 + onnx: + type: onnx model: - model_type: yolo-generic + model_type: yolonas width: 320 height: 320 input_tensor: nchw - input_dtype: float - path: /media/frigate/models/yolov9m.onnx + input_dtype: int + path: /media/frigate/models/yolo_nas_s.onnx labelmap_path: /labelmap/coco-80.txt record: diff --git a/argocd/manifests/frigate/deployment-notify.yaml b/argocd/manifests/frigate/deployment-notify.yaml index 6273d71..4083d4d 100644 --- a/argocd/manifests/frigate/deployment-notify.yaml +++ b/argocd/manifests/frigate/deployment-notify.yaml @@ -17,6 +17,9 @@ spec: containers: - name: frigate-notify image: ghcr.io/0x2142/frigate-notify:v0.3.5 + env: + - name: TZ + value: America/Los_Angeles volumeMounts: - name: config mountPath: /app/config.yml diff --git a/argocd/manifests/frigate/deployment.yaml b/argocd/manifests/frigate/deployment.yaml index afe11fb..1460bb3 100644 --- a/argocd/manifests/frigate/deployment.yaml +++ b/argocd/manifests/frigate/deployment.yaml @@ -6,6 +6,8 @@ metadata: namespace: frigate spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app: frigate @@ -14,6 +16,7 @@ spec: labels: app: frigate spec: + runtimeClassName: nvidia initContainers: - name: copy-config image: busybox:1.37 @@ -25,7 +28,7 @@ spec: mountPath: /config containers: - name: frigate - image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-standard-arm64 + image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-tensorrt ports: - containerPort: 5000 name: http @@ -60,6 +63,7 @@ spec: limits: memory: "2Gi" cpu: "2000m" + nvidia.com/gpu: "1" livenessProbe: httpGet: path: /api/version @@ -87,4 +91,4 @@ spec: - name: shm emptyDir: medium: Memory - sizeLimit: 256Mi + sizeLimit: 512Mi diff --git a/argocd/manifests/frigate/pv-nfs.yaml b/argocd/manifests/frigate/pv-nfs.yaml index d3a592b..c7197ab 100644 --- a/argocd/manifests/frigate/pv-nfs.yaml +++ b/argocd/manifests/frigate/pv-nfs.yaml @@ -1,11 +1,11 @@ # NFS PersistentVolume for Frigate recordings -# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for indri +# Requires: NFS share on sifaka at /volume1/frigate with NFS permissions for ringtail # # To create on Synology: # 1. Control Panel > Shared Folder > Create # 2. Name: frigate, Location: Volume 1 # 3. Control Panel > File Services > NFS > NFS Rules -# 4. Add rule for "frigate" share: Hostname=indri, Privilege=Read/Write, Squash=No mapping +# 4. Add rule for "frigate" share: Hostname=ringtail, Privilege=Read/Write, Squash=No mapping apiVersion: v1 kind: PersistentVolume metadata: diff --git a/argocd/manifests/frigate/pvc-database.yaml b/argocd/manifests/frigate/pvc-database.yaml index 040bda3..1eacb1d 100644 --- a/argocd/manifests/frigate/pvc-database.yaml +++ b/argocd/manifests/frigate/pvc-database.yaml @@ -1,5 +1,5 @@ # PersistentVolumeClaim for Frigate SQLite database -# Uses minikube's default storage class for local provisioning +# Uses k3s local-path storage class for local provisioning apiVersion: v1 kind: PersistentVolumeClaim metadata: diff --git a/argocd/manifests/homepage/values.yaml b/argocd/manifests/homepage/values.yaml index 73d4252..151c46e 100644 --- a/argocd/manifests/homepage/values.yaml +++ b/argocd/manifests/homepage/values.yaml @@ -135,6 +135,17 @@ config: # type: caddy # url: http://indri.tail8d86e.ts.net:2019 + # Services on ringtail k3s (not autodiscovered — different cluster) + - Infrastructure: + - NVR: + href: https://nvr.ops.eblu.me + icon: frigate.png + description: Network video recorder + - Ntfy: + href: https://ntfy.ops.eblu.me + icon: ntfy.png + description: Push notifications + # External bookmarks bookmarks: - Admin: diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml new file mode 100644 index 0000000..479d6e9 --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml @@ -0,0 +1,51 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-device-plugin + namespace: nvidia-device-plugin + labels: + app: nvidia-device-plugin +spec: + selector: + matchLabels: + app: nvidia-device-plugin + template: + metadata: + labels: + app: nvidia-device-plugin + spec: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: system-node-critical + containers: + - name: nvidia-device-plugin + image: nvcr.io/nvidia/k8s-device-plugin:v0.18.2 + args: + - --device-id-strategy=index + env: + - name: LD_LIBRARY_PATH + value: /run/nvidia/lib + securityContext: + privileged: true + volumeMounts: + - name: device-plugins + mountPath: /var/lib/kubelet/device-plugins + - name: cdi-specs + mountPath: /var/run/cdi + readOnly: true + - name: nvidia-libs + mountPath: /run/nvidia/lib + readOnly: true + volumes: + - name: device-plugins + hostPath: + path: /var/lib/kubelet/device-plugins + - name: cdi-specs + hostPath: + path: /var/run/cdi + - name: nvidia-libs + hostPath: + path: /etc/nvidia-driver/lib diff --git a/argocd/manifests/nvidia-device-plugin/runtime-class.yaml b/argocd/manifests/nvidia-device-plugin/runtime-class.yaml new file mode 100644 index 0000000..7ba6add --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/runtime-class.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia diff --git a/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md b/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md new file mode 100644 index 0000000..d204a4f --- /dev/null +++ b/docs/changelog.d/feature-frigate-ringtail-gpu.infra.md @@ -0,0 +1 @@ +Port Frigate NVR to ringtail k3s with RTX 4080 GPU acceleration (TensorRT/ONNX), replacing the ZMQ-based Apple Silicon detector on indri. diff --git a/nixos/ringtail/configuration.nix b/nixos/ringtail/configuration.nix index 6cb0581..5a0035b 100644 --- a/nixos/ringtail/configuration.nix +++ b/nixos/ringtail/configuration.nix @@ -35,6 +35,28 @@ in package = config.boot.kernelPackages.nvidiaPackages.stable; }; + # NVIDIA container toolkit (CDI specs + runtime for containerd/k3s GPU pods) + hardware.nvidia-container-toolkit.enable = true; + + # Stable path to NVIDIA driver libraries for k3s device plugin pod mounts. + # Avoids mounting all of /nix/store — only the driver derivation is needed. + environment.etc."nvidia-driver/lib".source = "${config.hardware.nvidia.package}/lib"; + + # Stable-path wrapper for nvidia-container-runtime.cdi (the CDI-based OCI + # runtime that injects GPU devices/libs from NixOS-generated CDI specs). + # The wrapper adds runc to PATH since k3s doesn't ship a standalone runc binary. + environment.etc."nvidia-container-runtime/nvidia-runtime-cdi-wrapper" = { + mode = "0755"; + text = '' + #!/bin/sh + export PATH="${pkgs.runc}/bin:$PATH" + exec ${pkgs.nvidia-container-toolkit.tools}/bin/nvidia-container-runtime.cdi "$@" + ''; + }; + + # NFS client support (required for k3s to mount NFS PersistentVolumes) + boot.supportedFilesystems = [ "nfs" ]; + # Wayland / Sway programs.sway = { enable = true; @@ -109,6 +131,19 @@ in "--write-kubeconfig-mode=644" "--tls-san=ringtail.tail8d86e.ts.net" ]; + containerdConfigTemplate = '' + {{ template "base" . }} + + [plugins.'io.containerd.cri.v1.runtime'] + enable_cdi = true + cdi_spec_dirs = ["/var/run/cdi", "/etc/cdi"] + + [plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_type = "io.containerd.runc.v2" + [plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia.options] + BinaryName = "/etc/nvidia-container-runtime/nvidia-runtime-cdi-wrapper" + ''; }; # K3s containerd registry mirrors (pull through Zot on indri) diff --git a/service-versions.yaml b/service-versions.yaml index dc102e9..1a435f5 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -51,6 +51,13 @@ services: upstream-source: https://github.com/gethomepage/homepage/releases notes: Deployed via Helm chart + - name: nvidia-device-plugin + type: argocd + last-reviewed: 2026-02-19 + current-version: "v0.18.2" + upstream-source: https://github.com/NVIDIA/k8s-device-plugin/releases + notes: DaemonSet + RuntimeClass on ringtail for GPU workloads + - name: frigate type: argocd last-reviewed: 2026-02-17