From 0d1c2eb81a1aa89b066c1971896b1831f366c3fc Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 2 Mar 2026 20:01:45 -0800 Subject: [PATCH 1/5] Deploy Ollama LLM server on ringtail with GPU time-slicing Add Ollama as a new ArgoCD-managed service on ringtail's k3s cluster: - Deployment with main ollama container and model-sync sidecar - Declarative model list (qwen2.5:14b, deepseek-r1:14b, phi4:14b, gemma3:12b) - hostPath PV on /mnt/storage1/ollama for fast local model storage - Tailscale ingress at ollama.ops.eblu.me - Enable GPU time-slicing (replicas: 2) on nvidia-device-plugin so Frigate and Ollama can share the RTX 4080 Co-Authored-By: Claude Opus 4.6 --- argocd/apps/ollama.yaml | 18 ++++ .../nvidia-device-plugin/daemonset.yaml | 7 ++ .../nvidia-device-plugin/kustomization.yaml | 1 + .../time-slicing-config.yaml | 14 ++++ argocd/manifests/ollama/deployment.yaml | 84 +++++++++++++++++++ .../manifests/ollama/ingress-tailscale.yaml | 26 ++++++ argocd/manifests/ollama/kustomization.yaml | 22 +++++ argocd/manifests/ollama/models.txt | 6 ++ argocd/manifests/ollama/pv-hostpath.yaml | 15 ++++ argocd/manifests/ollama/pvc.yaml | 14 ++++ argocd/manifests/ollama/service.yaml | 13 +++ argocd/manifests/ollama/sync-models.sh | 59 +++++++++++++ .../feature-ollama-ringtail.feature.md | 1 + service-versions.yaml | 7 ++ 14 files changed, 287 insertions(+) create mode 100644 argocd/apps/ollama.yaml create mode 100644 argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml create mode 100644 argocd/manifests/ollama/deployment.yaml create mode 100644 argocd/manifests/ollama/ingress-tailscale.yaml create mode 100644 argocd/manifests/ollama/kustomization.yaml create mode 100644 argocd/manifests/ollama/models.txt create mode 100644 argocd/manifests/ollama/pv-hostpath.yaml create mode 100644 argocd/manifests/ollama/pvc.yaml create mode 100644 argocd/manifests/ollama/service.yaml create mode 100644 argocd/manifests/ollama/sync-models.sh create mode 100644 docs/changelog.d/feature-ollama-ringtail.feature.md diff --git a/argocd/apps/ollama.yaml b/argocd/apps/ollama.yaml new file mode 100644 index 0000000..bb7a6a9 --- /dev/null +++ b/argocd/apps/ollama.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ollama + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/ollama + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: ollama + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml index 4c57a76..385ec8e 100644 --- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml +++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml @@ -25,6 +25,7 @@ spec: image: nvcr.io/nvidia/k8s-device-plugin args: - --device-id-strategy=index + - --config=/config/config.yaml env: - name: LD_LIBRARY_PATH value: /run/nvidia/lib @@ -39,6 +40,9 @@ spec: - name: nvidia-libs mountPath: /run/nvidia/lib readOnly: true + - name: plugin-config + mountPath: /config + readOnly: true volumes: - name: device-plugins hostPath: @@ -49,3 +53,6 @@ spec: - name: nvidia-libs hostPath: path: /etc/nvidia-driver/lib + - name: plugin-config + configMap: + name: nvidia-device-plugin-config diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml index 4ffe2d9..102127f 100644 --- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml +++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml @@ -6,6 +6,7 @@ namespace: nvidia-device-plugin resources: - daemonset.yaml - runtime-class.yaml + - time-slicing-config.yaml images: - name: nvcr.io/nvidia/k8s-device-plugin diff --git a/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml new file mode 100644 index 0000000..dee2fd7 --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nvidia-device-plugin-config + namespace: nvidia-device-plugin +data: + config.yaml: | + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 2 diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml new file mode 100644 index 0000000..3c13e14 --- /dev/null +++ b/argocd/manifests/ollama/deployment.yaml @@ -0,0 +1,84 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + runtimeClassName: nvidia + containers: + - name: ollama + image: ollama/ollama + ports: + - containerPort: 11434 + name: http + env: + - name: OLLAMA_MODELS + value: /models + - name: OLLAMA_HOST + value: "0.0.0.0:11434" + volumeMounts: + - name: models + mountPath: /models + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "16Gi" + cpu: "4000m" + nvidia.com/gpu: "1" + livenessProbe: + httpGet: + path: /api/tags + port: 11434 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /api/tags + port: 11434 + initialDelaySeconds: 10 + periodSeconds: 10 + - name: model-sync + image: ollama/ollama + command: ["/bin/bash", "/scripts/sync-models.sh"] + env: + - name: MODEL_LIST + value: /config/models.txt + - name: OLLAMA_HOST + value: "http://localhost:11434" + volumeMounts: + - name: models-config + mountPath: /config + - name: sync-script + mountPath: /scripts + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "256Mi" + cpu: "200m" + volumes: + - name: models + persistentVolumeClaim: + claimName: ollama-models + - name: models-config + configMap: + name: ollama-models + - name: sync-script + configMap: + name: ollama-sync-script + defaultMode: 493 diff --git a/argocd/manifests/ollama/ingress-tailscale.yaml b/argocd/manifests/ollama/ingress-tailscale.yaml new file mode 100644 index 0000000..bada466 --- /dev/null +++ b/argocd/manifests/ollama/ingress-tailscale.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ollama-tailscale + namespace: ollama + annotations: + tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + gethomepage.dev/enabled: "true" + gethomepage.dev/name: "Ollama" + gethomepage.dev/group: "AI" + gethomepage.dev/icon: "ollama.png" + gethomepage.dev/description: "LLM inference server" + gethomepage.dev/href: "https://ollama.ops.eblu.me" + gethomepage.dev/pod-selector: "app=ollama" +spec: + ingressClassName: tailscale + defaultBackend: + service: + name: ollama + port: + number: 11434 + tls: + - hosts: + - ollama diff --git a/argocd/manifests/ollama/kustomization.yaml b/argocd/manifests/ollama/kustomization.yaml new file mode 100644 index 0000000..75add74 --- /dev/null +++ b/argocd/manifests/ollama/kustomization.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: ollama +resources: + - pv-hostpath.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress-tailscale.yaml + +images: + - name: ollama/ollama + newTag: "0.17.5" + +configMapGenerator: + - name: ollama-models + files: + - models.txt + - name: ollama-sync-script + files: + - sync-models.sh diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt new file mode 100644 index 0000000..a998019 --- /dev/null +++ b/argocd/manifests/ollama/models.txt @@ -0,0 +1,6 @@ +# Models to pull from Ollama registry +# One model per line. Comments with #. +qwen2.5:14b +deepseek-r1:14b +phi4:14b +gemma3:12b diff --git a/argocd/manifests/ollama/pv-hostpath.yaml b/argocd/manifests/ollama/pv-hostpath.yaml new file mode 100644 index 0000000..d25dbcc --- /dev/null +++ b/argocd/manifests/ollama/pv-hostpath.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: ollama-models-pv +spec: + capacity: + storage: 200Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + hostPath: + path: /mnt/storage1/ollama + type: DirectoryOrCreate diff --git a/argocd/manifests/ollama/pvc.yaml b/argocd/manifests/ollama/pvc.yaml new file mode 100644 index 0000000..76c79a8 --- /dev/null +++ b/argocd/manifests/ollama/pvc.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-models + namespace: ollama +spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: ollama-models-pv + resources: + requests: + storage: 200Gi diff --git a/argocd/manifests/ollama/service.yaml b/argocd/manifests/ollama/service.yaml new file mode 100644 index 0000000..d9680e1 --- /dev/null +++ b/argocd/manifests/ollama/service.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh new file mode 100644 index 0000000..8f1921f --- /dev/null +++ b/argocd/manifests/ollama/sync-models.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Sync models from ConfigMap to Ollama server +# Runs as a sidecar in the ollama deployment +set -euo pipefail + +MODEL_LIST="${MODEL_LIST:-/config/models.txt}" +OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}" +SYNC_INTERVAL="${SYNC_INTERVAL:-1800}" + +echo "Syncing models from ${MODEL_LIST} to ollama at ${OLLAMA_HOST}" + +while true; do + # Wait for ollama server to be ready + echo "Waiting for Ollama API..." + max_attempts=60 + attempt=0 + until curl -sf "${OLLAMA_HOST}/api/tags" > /dev/null 2>&1; do + attempt=$((attempt + 1)) + if [[ $attempt -ge $max_attempts ]]; then + echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle" + sleep "$SYNC_INTERVAL" + continue 2 + fi + sleep 5 + done + echo "Ollama is ready" + + # Get list of currently pulled models + current=$(curl -sf "${OLLAMA_HOST}/api/tags" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 || true) + + pulled=0 + skipped=0 + + while IFS= read -r model || [[ -n "$model" ]]; do + # Skip empty lines and comments + [[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue + # Trim whitespace + model=$(echo "$model" | xargs) + [[ -z "$model" ]] && continue + + # Check if model is already pulled + if echo "$current" | grep -qF "$model"; then + echo "Already present: $model" + ((skipped++)) || true + else + echo "Pulling: $model" + if curl -sf "${OLLAMA_HOST}/api/pull" -d "{\"name\":\"$model\"}" > /dev/null; then + echo "Pulled: $model" + ((pulled++)) || true + else + echo "Warning: Failed to pull $model" >&2 + fi + fi + done < "$MODEL_LIST" + + echo "Sync complete: $pulled pulled, $skipped already present" + echo "Next sync in ${SYNC_INTERVAL}s" + sleep "$SYNC_INTERVAL" +done diff --git a/docs/changelog.d/feature-ollama-ringtail.feature.md b/docs/changelog.d/feature-ollama-ringtail.feature.md new file mode 100644 index 0000000..648757e --- /dev/null +++ b/docs/changelog.d/feature-ollama-ringtail.feature.md @@ -0,0 +1 @@ +Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management diff --git a/service-versions.yaml b/service-versions.yaml index c1c48e1..00e1084 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -135,6 +135,13 @@ services: current-version: "2026.2.0" upstream-source: https://github.com/goauthentik/authentik/releases + - name: ollama + type: argocd + last-reviewed: "2026-03-02" + current-version: "0.17.5" + upstream-source: https://github.com/ollama/ollama/releases + notes: LLM inference server on ringtail (GPU); upstream container image + - name: navidrome type: argocd last-reviewed: 2026-03-02 -- 2.50.1 (Apple Git-155) From 9cb235ab8e54319e8612df5e1794639b5f8ea276 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 2 Mar 2026 20:05:27 -0800 Subject: [PATCH 2/5] Use octal 0755 for defaultMode with yamllint inline disable Co-Authored-By: Claude Opus 4.6 --- argocd/manifests/ollama/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml index 3c13e14..2b68e55 100644 --- a/argocd/manifests/ollama/deployment.yaml +++ b/argocd/manifests/ollama/deployment.yaml @@ -81,4 +81,4 @@ spec: - name: sync-script configMap: name: ollama-sync-script - defaultMode: 493 + defaultMode: 0755 # yamllint disable-line rule:octal-values -- 2.50.1 (Apple Git-155) From 07376cc970b723ff3fb79e04ef4b014d4d3a30cc Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 2 Mar 2026 20:10:19 -0800 Subject: [PATCH 3/5] Fix nvidia-device-plugin config flag: --config-file not --config Co-Authored-By: Claude Opus 4.6 --- argocd/manifests/nvidia-device-plugin/daemonset.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml index 385ec8e..b484959 100644 --- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml +++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml @@ -25,7 +25,7 @@ spec: image: nvcr.io/nvidia/k8s-device-plugin args: - --device-id-strategy=index - - --config=/config/config.yaml + - --config-file=/config/config.yaml env: - name: LD_LIBRARY_PATH value: /run/nvidia/lib -- 2.50.1 (Apple Git-155) From dd678e745481b1594bf3cb0578ee5773e7e19d61 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 2 Mar 2026 20:20:19 -0800 Subject: [PATCH 4/5] Use ollama CLI instead of curl in sync script The ollama/ollama container image doesn't include curl. Use `ollama list` and `ollama pull` commands directly, which are always available. Co-Authored-By: Claude Opus 4.6 --- argocd/manifests/ollama/sync-models.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh index 8f1921f..9430704 100644 --- a/argocd/manifests/ollama/sync-models.sh +++ b/argocd/manifests/ollama/sync-models.sh @@ -1,20 +1,22 @@ #!/bin/bash # Sync models from ConfigMap to Ollama server -# Runs as a sidecar in the ollama deployment +# Runs as a sidecar in the ollama deployment, using the ollama CLI set -euo pipefail MODEL_LIST="${MODEL_LIST:-/config/models.txt}" OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}" SYNC_INTERVAL="${SYNC_INTERVAL:-1800}" -echo "Syncing models from ${MODEL_LIST} to ollama at ${OLLAMA_HOST}" +export OLLAMA_HOST + +echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})" while true; do # Wait for ollama server to be ready echo "Waiting for Ollama API..." max_attempts=60 attempt=0 - until curl -sf "${OLLAMA_HOST}/api/tags" > /dev/null 2>&1; do + until ollama list > /dev/null 2>&1; do attempt=$((attempt + 1)) if [[ $attempt -ge $max_attempts ]]; then echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle" @@ -26,7 +28,7 @@ while true; do echo "Ollama is ready" # Get list of currently pulled models - current=$(curl -sf "${OLLAMA_HOST}/api/tags" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 || true) + current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true) pulled=0 skipped=0 @@ -38,13 +40,13 @@ while true; do model=$(echo "$model" | xargs) [[ -z "$model" ]] && continue - # Check if model is already pulled + # Check if model is already pulled (ollama list shows name:tag) if echo "$current" | grep -qF "$model"; then echo "Already present: $model" ((skipped++)) || true else echo "Pulling: $model" - if curl -sf "${OLLAMA_HOST}/api/pull" -d "{\"name\":\"$model\"}" > /dev/null; then + if ollama pull "$model"; then echo "Pulled: $model" ((pulled++)) || true else -- 2.50.1 (Apple Git-155) From bc5aa654919d6e46506ae4c1aae0f536c9c78381 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 2 Mar 2026 20:37:12 -0800 Subject: [PATCH 5/5] Add ollama.ops.eblu.me to Caddy reverse proxy Proxies to the Tailscale ingress at ollama.tail8d86e.ts.net. Co-Authored-By: Claude Opus 4.6 --- ansible/roles/caddy/defaults/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index b0fc046..464d331 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -85,6 +85,9 @@ caddy_services: - name: ntfy host: "ntfy.{{ caddy_domain }}" backend: "https://ntfy.tail8d86e.ts.net" + - name: ollama + host: "ollama.{{ caddy_domain }}" + backend: "https://ollama.tail8d86e.ts.net" - name: sifaka host: "nas.{{ caddy_domain }}" backend: "http://sifaka:5000" -- 2.50.1 (Apple Git-155)