From 0d1c2eb81a1aa89b066c1971896b1831f366c3fc Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:01:45 -0800
Subject: [PATCH 1/5] Deploy Ollama LLM server on ringtail with GPU
 time-slicing

Add Ollama as a new ArgoCD-managed service on ringtail's k3s cluster:
- Deployment with main ollama container and model-sync sidecar
- Declarative model list (qwen2.5:14b, deepseek-r1:14b, phi4:14b, gemma3:12b)
- hostPath PV on /mnt/storage1/ollama for fast local model storage
- Tailscale ingress at ollama.ops.eblu.me
- Enable GPU time-slicing (replicas: 2) on nvidia-device-plugin so
  Frigate and Ollama can share the RTX 4080

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 argocd/apps/ollama.yaml                       | 18 ++++
 .../nvidia-device-plugin/daemonset.yaml       |  7 ++
 .../nvidia-device-plugin/kustomization.yaml   |  1 +
 .../time-slicing-config.yaml                  | 14 ++++
 argocd/manifests/ollama/deployment.yaml       | 84 +++++++++++++++++++
 .../manifests/ollama/ingress-tailscale.yaml   | 26 ++++++
 argocd/manifests/ollama/kustomization.yaml    | 22 +++++
 argocd/manifests/ollama/models.txt            |  6 ++
 argocd/manifests/ollama/pv-hostpath.yaml      | 15 ++++
 argocd/manifests/ollama/pvc.yaml              | 14 ++++
 argocd/manifests/ollama/service.yaml          | 13 +++
 argocd/manifests/ollama/sync-models.sh        | 59 +++++++++++++
 .../feature-ollama-ringtail.feature.md        |  1 +
 service-versions.yaml                         |  7 ++
 14 files changed, 287 insertions(+)
 create mode 100644 argocd/apps/ollama.yaml
 create mode 100644 argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
 create mode 100644 argocd/manifests/ollama/deployment.yaml
 create mode 100644 argocd/manifests/ollama/ingress-tailscale.yaml
 create mode 100644 argocd/manifests/ollama/kustomization.yaml
 create mode 100644 argocd/manifests/ollama/models.txt
 create mode 100644 argocd/manifests/ollama/pv-hostpath.yaml
 create mode 100644 argocd/manifests/ollama/pvc.yaml
 create mode 100644 argocd/manifests/ollama/service.yaml
 create mode 100644 argocd/manifests/ollama/sync-models.sh
 create mode 100644 docs/changelog.d/feature-ollama-ringtail.feature.md

diff --git a/argocd/apps/ollama.yaml b/argocd/apps/ollama.yaml
new file mode 100644
index 0000000..bb7a6a9
--- /dev/null
+++ b/argocd/apps/ollama.yaml
@@ -0,0 +1,18 @@
+---
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: ollama
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/ollama
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: ollama
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
index 4c57a76..385ec8e 100644
--- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml
+++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
@@ -25,6 +25,7 @@ spec:
           image: nvcr.io/nvidia/k8s-device-plugin
           args:
             - --device-id-strategy=index
+            - --config=/config/config.yaml
           env:
             - name: LD_LIBRARY_PATH
               value: /run/nvidia/lib
@@ -39,6 +40,9 @@ spec:
             - name: nvidia-libs
               mountPath: /run/nvidia/lib
               readOnly: true
+            - name: plugin-config
+              mountPath: /config
+              readOnly: true
       volumes:
         - name: device-plugins
           hostPath:
@@ -49,3 +53,6 @@ spec:
         - name: nvidia-libs
           hostPath:
             path: /etc/nvidia-driver/lib
+        - name: plugin-config
+          configMap:
+            name: nvidia-device-plugin-config
diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml
index 4ffe2d9..102127f 100644
--- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml
+++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml
@@ -6,6 +6,7 @@ namespace: nvidia-device-plugin
 resources:
   - daemonset.yaml
   - runtime-class.yaml
+  - time-slicing-config.yaml
 
 images:
   - name: nvcr.io/nvidia/k8s-device-plugin
diff --git a/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
new file mode 100644
index 0000000..dee2fd7
--- /dev/null
+++ b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
@@ -0,0 +1,14 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: nvidia-device-plugin-config
+  namespace: nvidia-device-plugin
+data:
+  config.yaml: |
+    version: v1
+    sharing:
+      timeSlicing:
+        resources:
+          - name: nvidia.com/gpu
+            replicas: 2
diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml
new file mode 100644
index 0000000..3c13e14
--- /dev/null
+++ b/argocd/manifests/ollama/deployment.yaml
@@ -0,0 +1,84 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      runtimeClassName: nvidia
+      containers:
+        - name: ollama
+          image: ollama/ollama
+          ports:
+            - containerPort: 11434
+              name: http
+          env:
+            - name: OLLAMA_MODELS
+              value: /models
+            - name: OLLAMA_HOST
+              value: "0.0.0.0:11434"
+          volumeMounts:
+            - name: models
+              mountPath: /models
+          resources:
+            requests:
+              memory: "512Mi"
+              cpu: "500m"
+            limits:
+              memory: "16Gi"
+              cpu: "4000m"
+              nvidia.com/gpu: "1"
+          livenessProbe:
+            httpGet:
+              path: /api/tags
+              port: 11434
+            initialDelaySeconds: 30
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /api/tags
+              port: 11434
+            initialDelaySeconds: 10
+            periodSeconds: 10
+        - name: model-sync
+          image: ollama/ollama
+          command: ["/bin/bash", "/scripts/sync-models.sh"]
+          env:
+            - name: MODEL_LIST
+              value: /config/models.txt
+            - name: OLLAMA_HOST
+              value: "http://localhost:11434"
+          volumeMounts:
+            - name: models-config
+              mountPath: /config
+            - name: sync-script
+              mountPath: /scripts
+          resources:
+            requests:
+              memory: "64Mi"
+              cpu: "50m"
+            limits:
+              memory: "256Mi"
+              cpu: "200m"
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: ollama-models
+        - name: models-config
+          configMap:
+            name: ollama-models
+        - name: sync-script
+          configMap:
+            name: ollama-sync-script
+            defaultMode: 493
diff --git a/argocd/manifests/ollama/ingress-tailscale.yaml b/argocd/manifests/ollama/ingress-tailscale.yaml
new file mode 100644
index 0000000..bada466
--- /dev/null
+++ b/argocd/manifests/ollama/ingress-tailscale.yaml
@@ -0,0 +1,26 @@
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: ollama-tailscale
+  namespace: ollama
+  annotations:
+    tailscale.com/proxy-class: "default"
+    tailscale.com/proxy-group: "ingress"
+    gethomepage.dev/enabled: "true"
+    gethomepage.dev/name: "Ollama"
+    gethomepage.dev/group: "AI"
+    gethomepage.dev/icon: "ollama.png"
+    gethomepage.dev/description: "LLM inference server"
+    gethomepage.dev/href: "https://ollama.ops.eblu.me"
+    gethomepage.dev/pod-selector: "app=ollama"
+spec:
+  ingressClassName: tailscale
+  defaultBackend:
+    service:
+      name: ollama
+      port:
+        number: 11434
+  tls:
+    - hosts:
+        - ollama
diff --git a/argocd/manifests/ollama/kustomization.yaml b/argocd/manifests/ollama/kustomization.yaml
new file mode 100644
index 0000000..75add74
--- /dev/null
+++ b/argocd/manifests/ollama/kustomization.yaml
@@ -0,0 +1,22 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: ollama
+resources:
+  - pv-hostpath.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - service.yaml
+  - ingress-tailscale.yaml
+
+images:
+  - name: ollama/ollama
+    newTag: "0.17.5"
+
+configMapGenerator:
+  - name: ollama-models
+    files:
+      - models.txt
+  - name: ollama-sync-script
+    files:
+      - sync-models.sh
diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt
new file mode 100644
index 0000000..a998019
--- /dev/null
+++ b/argocd/manifests/ollama/models.txt
@@ -0,0 +1,6 @@
+# Models to pull from Ollama registry
+# One model per line. Comments with #.
+qwen2.5:14b
+deepseek-r1:14b
+phi4:14b
+gemma3:12b
diff --git a/argocd/manifests/ollama/pv-hostpath.yaml b/argocd/manifests/ollama/pv-hostpath.yaml
new file mode 100644
index 0000000..d25dbcc
--- /dev/null
+++ b/argocd/manifests/ollama/pv-hostpath.yaml
@@ -0,0 +1,15 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: ollama-models-pv
+spec:
+  capacity:
+    storage: 200Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  storageClassName: ""
+  hostPath:
+    path: /mnt/storage1/ollama
+    type: DirectoryOrCreate
diff --git a/argocd/manifests/ollama/pvc.yaml b/argocd/manifests/ollama/pvc.yaml
new file mode 100644
index 0000000..76c79a8
--- /dev/null
+++ b/argocd/manifests/ollama/pvc.yaml
@@ -0,0 +1,14 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models
+  namespace: ollama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: ""
+  volumeName: ollama-models-pv
+  resources:
+    requests:
+      storage: 200Gi
diff --git a/argocd/manifests/ollama/service.yaml b/argocd/manifests/ollama/service.yaml
new file mode 100644
index 0000000..d9680e1
--- /dev/null
+++ b/argocd/manifests/ollama/service.yaml
@@ -0,0 +1,13 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  selector:
+    app: ollama
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 11434
diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh
new file mode 100644
index 0000000..8f1921f
--- /dev/null
+++ b/argocd/manifests/ollama/sync-models.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Sync models from ConfigMap to Ollama server
+# Runs as a sidecar in the ollama deployment
+set -euo pipefail
+
+MODEL_LIST="${MODEL_LIST:-/config/models.txt}"
+OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
+SYNC_INTERVAL="${SYNC_INTERVAL:-1800}"
+
+echo "Syncing models from ${MODEL_LIST} to ollama at ${OLLAMA_HOST}"
+
+while true; do
+    # Wait for ollama server to be ready
+    echo "Waiting for Ollama API..."
+    max_attempts=60
+    attempt=0
+    until curl -sf "${OLLAMA_HOST}/api/tags" > /dev/null 2>&1; do
+        attempt=$((attempt + 1))
+        if [[ $attempt -ge $max_attempts ]]; then
+            echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle"
+            sleep "$SYNC_INTERVAL"
+            continue 2
+        fi
+        sleep 5
+    done
+    echo "Ollama is ready"
+
+    # Get list of currently pulled models
+    current=$(curl -sf "${OLLAMA_HOST}/api/tags" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 || true)
+
+    pulled=0
+    skipped=0
+
+    while IFS= read -r model || [[ -n "$model" ]]; do
+        # Skip empty lines and comments
+        [[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue
+        # Trim whitespace
+        model=$(echo "$model" | xargs)
+        [[ -z "$model" ]] && continue
+
+        # Check if model is already pulled
+        if echo "$current" | grep -qF "$model"; then
+            echo "Already present: $model"
+            ((skipped++)) || true
+        else
+            echo "Pulling: $model"
+            if curl -sf "${OLLAMA_HOST}/api/pull" -d "{\"name\":\"$model\"}" > /dev/null; then
+                echo "Pulled: $model"
+                ((pulled++)) || true
+            else
+                echo "Warning: Failed to pull $model" >&2
+            fi
+        fi
+    done < "$MODEL_LIST"
+
+    echo "Sync complete: $pulled pulled, $skipped already present"
+    echo "Next sync in ${SYNC_INTERVAL}s"
+    sleep "$SYNC_INTERVAL"
+done
diff --git a/docs/changelog.d/feature-ollama-ringtail.feature.md b/docs/changelog.d/feature-ollama-ringtail.feature.md
new file mode 100644
index 0000000..648757e
--- /dev/null
+++ b/docs/changelog.d/feature-ollama-ringtail.feature.md
@@ -0,0 +1 @@
+Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management
diff --git a/service-versions.yaml b/service-versions.yaml
index c1c48e1..00e1084 100644
--- a/service-versions.yaml
+++ b/service-versions.yaml
@@ -135,6 +135,13 @@ services:
     current-version: "2026.2.0"
     upstream-source: https://github.com/goauthentik/authentik/releases
 
+  - name: ollama
+    type: argocd
+    last-reviewed: "2026-03-02"
+    current-version: "0.17.5"
+    upstream-source: https://github.com/ollama/ollama/releases
+    notes: LLM inference server on ringtail (GPU); upstream container image
+
   - name: navidrome
     type: argocd
     last-reviewed: 2026-03-02
-- 
2.50.1 (Apple Git-155)


From 9cb235ab8e54319e8612df5e1794639b5f8ea276 Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:05:27 -0800
Subject: [PATCH 2/5] Use octal 0755 for defaultMode with yamllint inline
 disable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 argocd/manifests/ollama/deployment.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml
index 3c13e14..2b68e55 100644
--- a/argocd/manifests/ollama/deployment.yaml
+++ b/argocd/manifests/ollama/deployment.yaml
@@ -81,4 +81,4 @@ spec:
         - name: sync-script
           configMap:
             name: ollama-sync-script
-            defaultMode: 493
+            defaultMode: 0755 # yamllint disable-line rule:octal-values
-- 
2.50.1 (Apple Git-155)


From 07376cc970b723ff3fb79e04ef4b014d4d3a30cc Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:10:19 -0800
Subject: [PATCH 3/5] Fix nvidia-device-plugin config flag: --config-file not
 --config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 argocd/manifests/nvidia-device-plugin/daemonset.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
index 385ec8e..b484959 100644
--- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml
+++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
@@ -25,7 +25,7 @@ spec:
           image: nvcr.io/nvidia/k8s-device-plugin
           args:
             - --device-id-strategy=index
-            - --config=/config/config.yaml
+            - --config-file=/config/config.yaml
           env:
             - name: LD_LIBRARY_PATH
               value: /run/nvidia/lib
-- 
2.50.1 (Apple Git-155)


From dd678e745481b1594bf3cb0578ee5773e7e19d61 Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:20:19 -0800
Subject: [PATCH 4/5] Use ollama CLI instead of curl in sync script

The ollama/ollama container image doesn't include curl. Use `ollama list`
and `ollama pull` commands directly, which are always available.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 argocd/manifests/ollama/sync-models.sh | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh
index 8f1921f..9430704 100644
--- a/argocd/manifests/ollama/sync-models.sh
+++ b/argocd/manifests/ollama/sync-models.sh
@@ -1,20 +1,22 @@
 #!/bin/bash
 # Sync models from ConfigMap to Ollama server
-# Runs as a sidecar in the ollama deployment
+# Runs as a sidecar in the ollama deployment, using the ollama CLI
 set -euo pipefail
 
 MODEL_LIST="${MODEL_LIST:-/config/models.txt}"
 OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
 SYNC_INTERVAL="${SYNC_INTERVAL:-1800}"
 
-echo "Syncing models from ${MODEL_LIST} to ollama at ${OLLAMA_HOST}"
+export OLLAMA_HOST
+
+echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})"
 
 while true; do
     # Wait for ollama server to be ready
     echo "Waiting for Ollama API..."
     max_attempts=60
     attempt=0
-    until curl -sf "${OLLAMA_HOST}/api/tags" > /dev/null 2>&1; do
+    until ollama list > /dev/null 2>&1; do
         attempt=$((attempt + 1))
         if [[ $attempt -ge $max_attempts ]]; then
             echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle"
@@ -26,7 +28,7 @@ while true; do
     echo "Ollama is ready"
 
     # Get list of currently pulled models
-    current=$(curl -sf "${OLLAMA_HOST}/api/tags" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 || true)
+    current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true)
 
     pulled=0
     skipped=0
@@ -38,13 +40,13 @@ while true; do
         model=$(echo "$model" | xargs)
         [[ -z "$model" ]] && continue
 
-        # Check if model is already pulled
+        # Check if model is already pulled (ollama list shows name:tag)
         if echo "$current" | grep -qF "$model"; then
             echo "Already present: $model"
             ((skipped++)) || true
         else
             echo "Pulling: $model"
-            if curl -sf "${OLLAMA_HOST}/api/pull" -d "{\"name\":\"$model\"}" > /dev/null; then
+            if ollama pull "$model"; then
                 echo "Pulled: $model"
                 ((pulled++)) || true
             else
-- 
2.50.1 (Apple Git-155)


From bc5aa654919d6e46506ae4c1aae0f536c9c78381 Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:37:12 -0800
Subject: [PATCH 5/5] Add ollama.ops.eblu.me to Caddy reverse proxy

Proxies to the Tailscale ingress at ollama.tail8d86e.ts.net.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ansible/roles/caddy/defaults/main.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml
index b0fc046..464d331 100644
--- a/ansible/roles/caddy/defaults/main.yml
+++ b/ansible/roles/caddy/defaults/main.yml
@@ -85,6 +85,9 @@ caddy_services:
   - name: ntfy
     host: "ntfy.{{ caddy_domain }}"
     backend: "https://ntfy.tail8d86e.ts.net"
+  - name: ollama
+    host: "ollama.{{ caddy_domain }}"
+    backend: "https://ollama.tail8d86e.ts.net"
   - name: sifaka
     host: "nas.{{ caddy_domain }}"
     backend: "http://sifaka:5000"
-- 
2.50.1 (Apple Git-155)