Switch to CDI for GPU device injection instead of nvidia-container-runtime

NixOS splits nvidia-container-toolkit into separate derivations, making
the nvidia-container-runtime binary path unreliable in containerd config.
CDI (Container Device Interface) is the modern approach:

- Enable CDI in k3s containerd config (cdi_spec_dirs: /var/run/cdi)
- Device plugin uses CDI annotations to inject GPU devices
- Remove RuntimeClass (not needed with CDI)
- Remove runtimeClassName from Frigate deployment
- Mount CDI specs into device plugin pod

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-02-19 12:28:16 -08:00
commit 912dfcab10
4 changed files with 14 additions and 13 deletions

View file

@ -23,7 +23,6 @@ spec:
mountPath: /config-ro
- name: config
mountPath: /config
runtimeClassName: nvidia
containers:
- name: frigate
image: ghcr.io/blakeblackshear/frigate:0.17.0-rc2-tensorrt

View file

@ -19,11 +19,15 @@ spec:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
runtimeClassName: nvidia
priorityClassName: system-node-critical
containers:
- name: nvidia-device-plugin
image: nvcr.io/nvidia/k8s-device-plugin:v0.18.2
env:
- name: DEVICE_LIST_STRATEGY
value: cdi-annotations
- name: CDI_ROOT
value: /var/run/cdi
securityContext:
allowPrivilegeEscalation: false
capabilities:
@ -31,7 +35,13 @@ spec:
volumeMounts:
- name: device-plugins
mountPath: /var/lib/kubelet/device-plugins
- name: cdi-specs
mountPath: /var/run/cdi
readOnly: true
volumes:
- name: device-plugins
hostPath:
path: /var/lib/kubelet/device-plugins
- name: cdi-specs
hostPath:
path: /var/run/cdi

View file

@ -1,6 +0,0 @@
---
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia

View file

@ -118,11 +118,9 @@ in
containerdConfigTemplate = ''
{{ template "base" . }}
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_type = "io.containerd.runc.v2"
[plugins.'io.containerd.cri.v1.runtime'.containerd.runtimes.nvidia.options]
BinaryName = "${pkgs.nvidia-container-toolkit}/bin/nvidia-container-runtime"
[plugins.'io.containerd.cri.v1.runtime']
enable_cdi = true
cdi_spec_dirs = ["/var/run/cdi", "/etc/cdi"]
'';
};