From 831b82950a0637d83041e43b79b75b2cd632e355 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 27 Mar 2026 07:19:24 -0700 Subject: [PATCH] =?UTF-8?q?Upgrade=20nvidia-device-plugin=20v0.18.2=20?= =?UTF-8?q?=E2=86=92=20v0.19.0=20and=20add=20reference=20card?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .../nvidia-device-plugin/kustomization.yaml | 2 +- .../+nvidia-device-plugin-v0.19.0.infra.md | 1 + docs/reference/services/frigate.md | 1 + .../services/nvidia-device-plugin.md | 26 +++++++++++++++++++ service-versions.yaml | 4 +-- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 docs/changelog.d/+nvidia-device-plugin-v0.19.0.infra.md create mode 100644 docs/reference/services/nvidia-device-plugin.md diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml index 102127f..a46edf6 100644 --- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml +++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml @@ -10,4 +10,4 @@ resources: images: - name: nvcr.io/nvidia/k8s-device-plugin - newTag: v0.18.2 + newTag: v0.19.0 diff --git a/docs/changelog.d/+nvidia-device-plugin-v0.19.0.infra.md b/docs/changelog.d/+nvidia-device-plugin-v0.19.0.infra.md new file mode 100644 index 0000000..95abf25 --- /dev/null +++ b/docs/changelog.d/+nvidia-device-plugin-v0.19.0.infra.md @@ -0,0 +1 @@ +Upgrade nvidia-device-plugin from v0.18.2 to v0.19.0 diff --git a/docs/reference/services/frigate.md b/docs/reference/services/frigate.md index 46363bd..000d4ee 100644 --- a/docs/reference/services/frigate.md +++ b/docs/reference/services/frigate.md @@ -74,6 +74,7 @@ A separate **frigate-notify** pod polls Frigate's webapi every 15 seconds for de ## Related +- [[nvidia-device-plugin]] - GPU device plugin enabling CUDA access - [[ntfy]] - Push notification delivery - [[sifaka]] - NAS storage for recordings - [[observability]] - Prometheus metrics at `/api/metrics` diff --git a/docs/reference/services/nvidia-device-plugin.md b/docs/reference/services/nvidia-device-plugin.md new file mode 100644 index 0000000..7eb28d9 --- /dev/null +++ b/docs/reference/services/nvidia-device-plugin.md @@ -0,0 +1,26 @@ +--- +title: NVIDIA Device Plugin +modified: 2026-03-27 +tags: + - service + - gpu +--- + +# NVIDIA Device Plugin + +Kubernetes device plugin that exposes NVIDIA GPUs to pods on [[ringtail]]. Required for GPU workloads like [[frigate]] (object detection) and [[ollama]] (LLM inference). + +## Quick Reference + +| Property | Value | +|----------|-------| +| **Namespace** | `nvidia-device-plugin` | +| **Image** | `nvcr.io/nvidia/k8s-device-plugin` | +| **Upstream** | https://github.com/NVIDIA/k8s-device-plugin | +| **Manifests** | [argocd/manifests/nvidia-device-plugin/](https://forge.eblu.me/eblume/blumeops/src/branch/main/argocd/manifests/nvidia-device-plugin) | + +## Architecture + +Runs as a DaemonSet with `privileged` security context, mounting the host's device-plugins socket, CDI specs, and NVIDIA driver libraries. A `RuntimeClass` named `nvidia` is defined for pods that need GPU access. + +Time-slicing is configured with 2 replicas per GPU, allowing two pods to share a single physical GPU. diff --git a/service-versions.yaml b/service-versions.yaml index ccc062e..6821488 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -46,8 +46,8 @@ services: - name: nvidia-device-plugin type: argocd - last-reviewed: 2026-02-19 - current-version: "v0.18.2" + last-reviewed: 2026-03-27 + current-version: "v0.19.0" upstream-source: https://github.com/NVIDIA/k8s-device-plugin/releases notes: DaemonSet + RuntimeClass on ringtail for GPU workloads