From 31d925814fc2aa1ff5c19b7b0707c7234af5537f Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Mon, 2 Mar 2026 20:39:51 -0800
Subject: [PATCH] Deploy Ollama LLM server on ringtail (#277)

## Summary
- Deploy Ollama as a new ArgoCD-managed service on ringtail's k3s cluster with GPU acceleration
- Declarative model management via `models.txt` + sidecar sync script (mirrors kiwix torrent pattern)
- Initial models: `qwen2.5:14b`, `deepseek-r1:14b`, `phi4:14b`, `gemma3:12b`
- hostPath PV on `/mnt/storage1/ollama` for fast local model storage (200Gi)
- Tailscale ingress at `ollama.ops.eblu.me` for API access from tailnet
- Enable GPU time-slicing (`replicas: 2`) on nvidia-device-plugin so Frigate and Ollama share the RTX 4080

## Deployment and Testing
- [ ] Deploy nvidia-device-plugin changes first: `argocd app sync nvidia-device-plugin`
- [ ] Verify GPU time-slicing: `kubectl describe node ringtail --context=k3s-ringtail` shows `nvidia.com/gpu: 2`
- [ ] Sync `apps` app with `--revision feature/ollama-ringtail`
- [ ] Set ollama app to branch: `argocd app set ollama --revision feature/ollama-ringtail && argocd app sync ollama`
- [ ] Verify model-sync sidecar pulls models: `kubectl logs -n ollama deploy/ollama -c model-sync --context=k3s-ringtail`
- [ ] Test API: `curl https://ollama.ops.eblu.me/api/tags`
- [ ] Test inference: `curl https://ollama.ops.eblu.me/api/generate -d '{"model":"qwen2.5:14b","prompt":"Hello"}'`
- [ ] Verify Frigate still works after GPU sharing change
- [ ] After merge: `argocd app set ollama --revision main && argocd app sync ollama`

Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/277
---
 ansible/roles/caddy/defaults/main.yml         |  3 +
 argocd/apps/ollama.yaml                       | 18 ++++
 .../nvidia-device-plugin/daemonset.yaml       |  7 ++
 .../nvidia-device-plugin/kustomization.yaml   |  1 +
 .../time-slicing-config.yaml                  | 14 ++++
 argocd/manifests/ollama/deployment.yaml       | 84 +++++++++++++++++++
 .../manifests/ollama/ingress-tailscale.yaml   | 26 ++++++
 argocd/manifests/ollama/kustomization.yaml    | 22 +++++
 argocd/manifests/ollama/models.txt            |  6 ++
 argocd/manifests/ollama/pv-hostpath.yaml      | 15 ++++
 argocd/manifests/ollama/pvc.yaml              | 14 ++++
 argocd/manifests/ollama/service.yaml          | 13 +++
 argocd/manifests/ollama/sync-models.sh        | 61 ++++++++++++++
 .../feature-ollama-ringtail.feature.md        |  1 +
 service-versions.yaml                         |  7 ++
 15 files changed, 292 insertions(+)
 create mode 100644 argocd/apps/ollama.yaml
 create mode 100644 argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
 create mode 100644 argocd/manifests/ollama/deployment.yaml
 create mode 100644 argocd/manifests/ollama/ingress-tailscale.yaml
 create mode 100644 argocd/manifests/ollama/kustomization.yaml
 create mode 100644 argocd/manifests/ollama/models.txt
 create mode 100644 argocd/manifests/ollama/pv-hostpath.yaml
 create mode 100644 argocd/manifests/ollama/pvc.yaml
 create mode 100644 argocd/manifests/ollama/service.yaml
 create mode 100644 argocd/manifests/ollama/sync-models.sh
 create mode 100644 docs/changelog.d/feature-ollama-ringtail.feature.md

diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml
index b0fc046..464d331 100644
--- a/ansible/roles/caddy/defaults/main.yml
+++ b/ansible/roles/caddy/defaults/main.yml
@@ -85,6 +85,9 @@ caddy_services:
   - name: ntfy
     host: "ntfy.{{ caddy_domain }}"
     backend: "https://ntfy.tail8d86e.ts.net"
+  - name: ollama
+    host: "ollama.{{ caddy_domain }}"
+    backend: "https://ollama.tail8d86e.ts.net"
   - name: sifaka
     host: "nas.{{ caddy_domain }}"
     backend: "http://sifaka:5000"
diff --git a/argocd/apps/ollama.yaml b/argocd/apps/ollama.yaml
new file mode 100644
index 0000000..bb7a6a9
--- /dev/null
+++ b/argocd/apps/ollama.yaml
@@ -0,0 +1,18 @@
+---
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: ollama
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/ollama
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: ollama
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
index 4c57a76..b484959 100644
--- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml
+++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml
@@ -25,6 +25,7 @@ spec:
           image: nvcr.io/nvidia/k8s-device-plugin
           args:
             - --device-id-strategy=index
+            - --config-file=/config/config.yaml
           env:
             - name: LD_LIBRARY_PATH
               value: /run/nvidia/lib
@@ -39,6 +40,9 @@ spec:
             - name: nvidia-libs
               mountPath: /run/nvidia/lib
               readOnly: true
+            - name: plugin-config
+              mountPath: /config
+              readOnly: true
       volumes:
         - name: device-plugins
           hostPath:
@@ -49,3 +53,6 @@ spec:
         - name: nvidia-libs
           hostPath:
             path: /etc/nvidia-driver/lib
+        - name: plugin-config
+          configMap:
+            name: nvidia-device-plugin-config
diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml
index 4ffe2d9..102127f 100644
--- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml
+++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml
@@ -6,6 +6,7 @@ namespace: nvidia-device-plugin
 resources:
   - daemonset.yaml
   - runtime-class.yaml
+  - time-slicing-config.yaml
 
 images:
   - name: nvcr.io/nvidia/k8s-device-plugin
diff --git a/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
new file mode 100644
index 0000000..dee2fd7
--- /dev/null
+++ b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml
@@ -0,0 +1,14 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: nvidia-device-plugin-config
+  namespace: nvidia-device-plugin
+data:
+  config.yaml: |
+    version: v1
+    sharing:
+      timeSlicing:
+        resources:
+          - name: nvidia.com/gpu
+            replicas: 2
diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml
new file mode 100644
index 0000000..2b68e55
--- /dev/null
+++ b/argocd/manifests/ollama/deployment.yaml
@@ -0,0 +1,84 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      runtimeClassName: nvidia
+      containers:
+        - name: ollama
+          image: ollama/ollama
+          ports:
+            - containerPort: 11434
+              name: http
+          env:
+            - name: OLLAMA_MODELS
+              value: /models
+            - name: OLLAMA_HOST
+              value: "0.0.0.0:11434"
+          volumeMounts:
+            - name: models
+              mountPath: /models
+          resources:
+            requests:
+              memory: "512Mi"
+              cpu: "500m"
+            limits:
+              memory: "16Gi"
+              cpu: "4000m"
+              nvidia.com/gpu: "1"
+          livenessProbe:
+            httpGet:
+              path: /api/tags
+              port: 11434
+            initialDelaySeconds: 30
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /api/tags
+              port: 11434
+            initialDelaySeconds: 10
+            periodSeconds: 10
+        - name: model-sync
+          image: ollama/ollama
+          command: ["/bin/bash", "/scripts/sync-models.sh"]
+          env:
+            - name: MODEL_LIST
+              value: /config/models.txt
+            - name: OLLAMA_HOST
+              value: "http://localhost:11434"
+          volumeMounts:
+            - name: models-config
+              mountPath: /config
+            - name: sync-script
+              mountPath: /scripts
+          resources:
+            requests:
+              memory: "64Mi"
+              cpu: "50m"
+            limits:
+              memory: "256Mi"
+              cpu: "200m"
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: ollama-models
+        - name: models-config
+          configMap:
+            name: ollama-models
+        - name: sync-script
+          configMap:
+            name: ollama-sync-script
+            defaultMode: 0755 # yamllint disable-line rule:octal-values
diff --git a/argocd/manifests/ollama/ingress-tailscale.yaml b/argocd/manifests/ollama/ingress-tailscale.yaml
new file mode 100644
index 0000000..bada466
--- /dev/null
+++ b/argocd/manifests/ollama/ingress-tailscale.yaml
@@ -0,0 +1,26 @@
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: ollama-tailscale
+  namespace: ollama
+  annotations:
+    tailscale.com/proxy-class: "default"
+    tailscale.com/proxy-group: "ingress"
+    gethomepage.dev/enabled: "true"
+    gethomepage.dev/name: "Ollama"
+    gethomepage.dev/group: "AI"
+    gethomepage.dev/icon: "ollama.png"
+    gethomepage.dev/description: "LLM inference server"
+    gethomepage.dev/href: "https://ollama.ops.eblu.me"
+    gethomepage.dev/pod-selector: "app=ollama"
+spec:
+  ingressClassName: tailscale
+  defaultBackend:
+    service:
+      name: ollama
+      port:
+        number: 11434
+  tls:
+    - hosts:
+        - ollama
diff --git a/argocd/manifests/ollama/kustomization.yaml b/argocd/manifests/ollama/kustomization.yaml
new file mode 100644
index 0000000..75add74
--- /dev/null
+++ b/argocd/manifests/ollama/kustomization.yaml
@@ -0,0 +1,22 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: ollama
+resources:
+  - pv-hostpath.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - service.yaml
+  - ingress-tailscale.yaml
+
+images:
+  - name: ollama/ollama
+    newTag: "0.17.5"
+
+configMapGenerator:
+  - name: ollama-models
+    files:
+      - models.txt
+  - name: ollama-sync-script
+    files:
+      - sync-models.sh
diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt
new file mode 100644
index 0000000..a998019
--- /dev/null
+++ b/argocd/manifests/ollama/models.txt
@@ -0,0 +1,6 @@
+# Models to pull from Ollama registry
+# One model per line. Comments with #.
+qwen2.5:14b
+deepseek-r1:14b
+phi4:14b
+gemma3:12b
diff --git a/argocd/manifests/ollama/pv-hostpath.yaml b/argocd/manifests/ollama/pv-hostpath.yaml
new file mode 100644
index 0000000..d25dbcc
--- /dev/null
+++ b/argocd/manifests/ollama/pv-hostpath.yaml
@@ -0,0 +1,15 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: ollama-models-pv
+spec:
+  capacity:
+    storage: 200Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  storageClassName: ""
+  hostPath:
+    path: /mnt/storage1/ollama
+    type: DirectoryOrCreate
diff --git a/argocd/manifests/ollama/pvc.yaml b/argocd/manifests/ollama/pvc.yaml
new file mode 100644
index 0000000..76c79a8
--- /dev/null
+++ b/argocd/manifests/ollama/pvc.yaml
@@ -0,0 +1,14 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models
+  namespace: ollama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: ""
+  volumeName: ollama-models-pv
+  resources:
+    requests:
+      storage: 200Gi
diff --git a/argocd/manifests/ollama/service.yaml b/argocd/manifests/ollama/service.yaml
new file mode 100644
index 0000000..d9680e1
--- /dev/null
+++ b/argocd/manifests/ollama/service.yaml
@@ -0,0 +1,13 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  selector:
+    app: ollama
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 11434
diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh
new file mode 100644
index 0000000..9430704
--- /dev/null
+++ b/argocd/manifests/ollama/sync-models.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Sync models from ConfigMap to Ollama server
+# Runs as a sidecar in the ollama deployment, using the ollama CLI
+set -euo pipefail
+
+MODEL_LIST="${MODEL_LIST:-/config/models.txt}"
+OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
+SYNC_INTERVAL="${SYNC_INTERVAL:-1800}"
+
+export OLLAMA_HOST
+
+echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})"
+
+while true; do
+    # Wait for ollama server to be ready
+    echo "Waiting for Ollama API..."
+    max_attempts=60
+    attempt=0
+    until ollama list > /dev/null 2>&1; do
+        attempt=$((attempt + 1))
+        if [[ $attempt -ge $max_attempts ]]; then
+            echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle"
+            sleep "$SYNC_INTERVAL"
+            continue 2
+        fi
+        sleep 5
+    done
+    echo "Ollama is ready"
+
+    # Get list of currently pulled models
+    current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true)
+
+    pulled=0
+    skipped=0
+
+    while IFS= read -r model || [[ -n "$model" ]]; do
+        # Skip empty lines and comments
+        [[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue
+        # Trim whitespace
+        model=$(echo "$model" | xargs)
+        [[ -z "$model" ]] && continue
+
+        # Check if model is already pulled (ollama list shows name:tag)
+        if echo "$current" | grep -qF "$model"; then
+            echo "Already present: $model"
+            ((skipped++)) || true
+        else
+            echo "Pulling: $model"
+            if ollama pull "$model"; then
+                echo "Pulled: $model"
+                ((pulled++)) || true
+            else
+                echo "Warning: Failed to pull $model" >&2
+            fi
+        fi
+    done < "$MODEL_LIST"
+
+    echo "Sync complete: $pulled pulled, $skipped already present"
+    echo "Next sync in ${SYNC_INTERVAL}s"
+    sleep "$SYNC_INTERVAL"
+done
diff --git a/docs/changelog.d/feature-ollama-ringtail.feature.md b/docs/changelog.d/feature-ollama-ringtail.feature.md
new file mode 100644
index 0000000..648757e
--- /dev/null
+++ b/docs/changelog.d/feature-ollama-ringtail.feature.md
@@ -0,0 +1 @@
+Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management
diff --git a/service-versions.yaml b/service-versions.yaml
index c1c48e1..00e1084 100644
--- a/service-versions.yaml
+++ b/service-versions.yaml
@@ -135,6 +135,13 @@ services:
     current-version: "2026.2.0"
     upstream-source: https://github.com/goauthentik/authentik/releases
 
+  - name: ollama
+    type: argocd
+    last-reviewed: "2026-03-02"
+    current-version: "0.17.5"
+    upstream-source: https://github.com/ollama/ollama/releases
+    notes: LLM inference server on ringtail (GPU); upstream container image
+
   - name: navidrome
     type: argocd
     last-reviewed: 2026-03-02