Deploy Ollama LLM server on ringtail (#277)
## Summary - Deploy Ollama as a new ArgoCD-managed service on ringtail's k3s cluster with GPU acceleration - Declarative model management via `models.txt` + sidecar sync script (mirrors kiwix torrent pattern) - Initial models: `qwen2.5:14b`, `deepseek-r1:14b`, `phi4:14b`, `gemma3:12b` - hostPath PV on `/mnt/storage1/ollama` for fast local model storage (200Gi) - Tailscale ingress at `ollama.ops.eblu.me` for API access from tailnet - Enable GPU time-slicing (`replicas: 2`) on nvidia-device-plugin so Frigate and Ollama share the RTX 4080 ## Deployment and Testing - [ ] Deploy nvidia-device-plugin changes first: `argocd app sync nvidia-device-plugin` - [ ] Verify GPU time-slicing: `kubectl describe node ringtail --context=k3s-ringtail` shows `nvidia.com/gpu: 2` - [ ] Sync `apps` app with `--revision feature/ollama-ringtail` - [ ] Set ollama app to branch: `argocd app set ollama --revision feature/ollama-ringtail && argocd app sync ollama` - [ ] Verify model-sync sidecar pulls models: `kubectl logs -n ollama deploy/ollama -c model-sync --context=k3s-ringtail` - [ ] Test API: `curl https://ollama.ops.eblu.me/api/tags` - [ ] Test inference: `curl https://ollama.ops.eblu.me/api/generate -d '{"model":"qwen2.5:14b","prompt":"Hello"}'` - [ ] Verify Frigate still works after GPU sharing change - [ ] After merge: `argocd app set ollama --revision main && argocd app sync ollama` Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/277
This commit is contained in:
parent
0f79c61c42
commit
31d925814f
15 changed files with 292 additions and 0 deletions
|
|
@ -85,6 +85,9 @@ caddy_services:
|
||||||
- name: ntfy
|
- name: ntfy
|
||||||
host: "ntfy.{{ caddy_domain }}"
|
host: "ntfy.{{ caddy_domain }}"
|
||||||
backend: "https://ntfy.tail8d86e.ts.net"
|
backend: "https://ntfy.tail8d86e.ts.net"
|
||||||
|
- name: ollama
|
||||||
|
host: "ollama.{{ caddy_domain }}"
|
||||||
|
backend: "https://ollama.tail8d86e.ts.net"
|
||||||
- name: sifaka
|
- name: sifaka
|
||||||
host: "nas.{{ caddy_domain }}"
|
host: "nas.{{ caddy_domain }}"
|
||||||
backend: "http://sifaka:5000"
|
backend: "http://sifaka:5000"
|
||||||
|
|
|
||||||
18
argocd/apps/ollama.yaml
Normal file
18
argocd/apps/ollama.yaml
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: argocd
|
||||||
|
spec:
|
||||||
|
project: default
|
||||||
|
source:
|
||||||
|
repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
|
||||||
|
targetRevision: main
|
||||||
|
path: argocd/manifests/ollama
|
||||||
|
destination:
|
||||||
|
server: https://ringtail.tail8d86e.ts.net:6443
|
||||||
|
namespace: ollama
|
||||||
|
syncPolicy:
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
|
@ -25,6 +25,7 @@ spec:
|
||||||
image: nvcr.io/nvidia/k8s-device-plugin
|
image: nvcr.io/nvidia/k8s-device-plugin
|
||||||
args:
|
args:
|
||||||
- --device-id-strategy=index
|
- --device-id-strategy=index
|
||||||
|
- --config-file=/config/config.yaml
|
||||||
env:
|
env:
|
||||||
- name: LD_LIBRARY_PATH
|
- name: LD_LIBRARY_PATH
|
||||||
value: /run/nvidia/lib
|
value: /run/nvidia/lib
|
||||||
|
|
@ -39,6 +40,9 @@ spec:
|
||||||
- name: nvidia-libs
|
- name: nvidia-libs
|
||||||
mountPath: /run/nvidia/lib
|
mountPath: /run/nvidia/lib
|
||||||
readOnly: true
|
readOnly: true
|
||||||
|
- name: plugin-config
|
||||||
|
mountPath: /config
|
||||||
|
readOnly: true
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugins
|
- name: device-plugins
|
||||||
hostPath:
|
hostPath:
|
||||||
|
|
@ -49,3 +53,6 @@ spec:
|
||||||
- name: nvidia-libs
|
- name: nvidia-libs
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /etc/nvidia-driver/lib
|
path: /etc/nvidia-driver/lib
|
||||||
|
- name: plugin-config
|
||||||
|
configMap:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ namespace: nvidia-device-plugin
|
||||||
resources:
|
resources:
|
||||||
- daemonset.yaml
|
- daemonset.yaml
|
||||||
- runtime-class.yaml
|
- runtime-class.yaml
|
||||||
|
- time-slicing-config.yaml
|
||||||
|
|
||||||
images:
|
images:
|
||||||
- name: nvcr.io/nvidia/k8s-device-plugin
|
- name: nvcr.io/nvidia/k8s-device-plugin
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
namespace: nvidia-device-plugin
|
||||||
|
data:
|
||||||
|
config.yaml: |
|
||||||
|
version: v1
|
||||||
|
sharing:
|
||||||
|
timeSlicing:
|
||||||
|
resources:
|
||||||
|
- name: nvidia.com/gpu
|
||||||
|
replicas: 2
|
||||||
84
argocd/manifests/ollama/deployment.yaml
Normal file
84
argocd/manifests/ollama/deployment.yaml
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: ollama
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: ollama
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ollama
|
||||||
|
spec:
|
||||||
|
runtimeClassName: nvidia
|
||||||
|
containers:
|
||||||
|
- name: ollama
|
||||||
|
image: ollama/ollama
|
||||||
|
ports:
|
||||||
|
- containerPort: 11434
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: OLLAMA_MODELS
|
||||||
|
value: /models
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: "0.0.0.0:11434"
|
||||||
|
volumeMounts:
|
||||||
|
- name: models
|
||||||
|
mountPath: /models
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
limits:
|
||||||
|
memory: "16Gi"
|
||||||
|
cpu: "4000m"
|
||||||
|
nvidia.com/gpu: "1"
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/tags
|
||||||
|
port: 11434
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/tags
|
||||||
|
port: 11434
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
- name: model-sync
|
||||||
|
image: ollama/ollama
|
||||||
|
command: ["/bin/bash", "/scripts/sync-models.sh"]
|
||||||
|
env:
|
||||||
|
- name: MODEL_LIST
|
||||||
|
value: /config/models.txt
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: "http://localhost:11434"
|
||||||
|
volumeMounts:
|
||||||
|
- name: models-config
|
||||||
|
mountPath: /config
|
||||||
|
- name: sync-script
|
||||||
|
mountPath: /scripts
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "64Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "200m"
|
||||||
|
volumes:
|
||||||
|
- name: models
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ollama-models
|
||||||
|
- name: models-config
|
||||||
|
configMap:
|
||||||
|
name: ollama-models
|
||||||
|
- name: sync-script
|
||||||
|
configMap:
|
||||||
|
name: ollama-sync-script
|
||||||
|
defaultMode: 0755 # yamllint disable-line rule:octal-values
|
||||||
26
argocd/manifests/ollama/ingress-tailscale.yaml
Normal file
26
argocd/manifests/ollama/ingress-tailscale.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: ollama-tailscale
|
||||||
|
namespace: ollama
|
||||||
|
annotations:
|
||||||
|
tailscale.com/proxy-class: "default"
|
||||||
|
tailscale.com/proxy-group: "ingress"
|
||||||
|
gethomepage.dev/enabled: "true"
|
||||||
|
gethomepage.dev/name: "Ollama"
|
||||||
|
gethomepage.dev/group: "AI"
|
||||||
|
gethomepage.dev/icon: "ollama.png"
|
||||||
|
gethomepage.dev/description: "LLM inference server"
|
||||||
|
gethomepage.dev/href: "https://ollama.ops.eblu.me"
|
||||||
|
gethomepage.dev/pod-selector: "app=ollama"
|
||||||
|
spec:
|
||||||
|
ingressClassName: tailscale
|
||||||
|
defaultBackend:
|
||||||
|
service:
|
||||||
|
name: ollama
|
||||||
|
port:
|
||||||
|
number: 11434
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- ollama
|
||||||
22
argocd/manifests/ollama/kustomization.yaml
Normal file
22
argocd/manifests/ollama/kustomization.yaml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
---
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: ollama
|
||||||
|
resources:
|
||||||
|
- pv-hostpath.yaml
|
||||||
|
- pvc.yaml
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- ingress-tailscale.yaml
|
||||||
|
|
||||||
|
images:
|
||||||
|
- name: ollama/ollama
|
||||||
|
newTag: "0.17.5"
|
||||||
|
|
||||||
|
configMapGenerator:
|
||||||
|
- name: ollama-models
|
||||||
|
files:
|
||||||
|
- models.txt
|
||||||
|
- name: ollama-sync-script
|
||||||
|
files:
|
||||||
|
- sync-models.sh
|
||||||
6
argocd/manifests/ollama/models.txt
Normal file
6
argocd/manifests/ollama/models.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
# Models to pull from Ollama registry
|
||||||
|
# One model per line. Comments with #.
|
||||||
|
qwen2.5:14b
|
||||||
|
deepseek-r1:14b
|
||||||
|
phi4:14b
|
||||||
|
gemma3:12b
|
||||||
15
argocd/manifests/ollama/pv-hostpath.yaml
Normal file
15
argocd/manifests/ollama/pv-hostpath.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: ollama-models-pv
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 200Gi
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: ""
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/storage1/ollama
|
||||||
|
type: DirectoryOrCreate
|
||||||
14
argocd/manifests/ollama/pvc.yaml
Normal file
14
argocd/manifests/ollama/pvc.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ollama-models
|
||||||
|
namespace: ollama
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: ""
|
||||||
|
volumeName: ollama-models-pv
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 200Gi
|
||||||
13
argocd/manifests/ollama/service.yaml
Normal file
13
argocd/manifests/ollama/service.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: ollama
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: ollama
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 11434
|
||||||
|
targetPort: 11434
|
||||||
61
argocd/manifests/ollama/sync-models.sh
Normal file
61
argocd/manifests/ollama/sync-models.sh
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Sync models from ConfigMap to Ollama server
|
||||||
|
# Runs as a sidecar in the ollama deployment, using the ollama CLI
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
MODEL_LIST="${MODEL_LIST:-/config/models.txt}"
|
||||||
|
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
|
||||||
|
SYNC_INTERVAL="${SYNC_INTERVAL:-1800}"
|
||||||
|
|
||||||
|
export OLLAMA_HOST
|
||||||
|
|
||||||
|
echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
# Wait for ollama server to be ready
|
||||||
|
echo "Waiting for Ollama API..."
|
||||||
|
max_attempts=60
|
||||||
|
attempt=0
|
||||||
|
until ollama list > /dev/null 2>&1; do
|
||||||
|
attempt=$((attempt + 1))
|
||||||
|
if [[ $attempt -ge $max_attempts ]]; then
|
||||||
|
echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle"
|
||||||
|
sleep "$SYNC_INTERVAL"
|
||||||
|
continue 2
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo "Ollama is ready"
|
||||||
|
|
||||||
|
# Get list of currently pulled models
|
||||||
|
current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true)
|
||||||
|
|
||||||
|
pulled=0
|
||||||
|
skipped=0
|
||||||
|
|
||||||
|
while IFS= read -r model || [[ -n "$model" ]]; do
|
||||||
|
# Skip empty lines and comments
|
||||||
|
[[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue
|
||||||
|
# Trim whitespace
|
||||||
|
model=$(echo "$model" | xargs)
|
||||||
|
[[ -z "$model" ]] && continue
|
||||||
|
|
||||||
|
# Check if model is already pulled (ollama list shows name:tag)
|
||||||
|
if echo "$current" | grep -qF "$model"; then
|
||||||
|
echo "Already present: $model"
|
||||||
|
((skipped++)) || true
|
||||||
|
else
|
||||||
|
echo "Pulling: $model"
|
||||||
|
if ollama pull "$model"; then
|
||||||
|
echo "Pulled: $model"
|
||||||
|
((pulled++)) || true
|
||||||
|
else
|
||||||
|
echo "Warning: Failed to pull $model" >&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done < "$MODEL_LIST"
|
||||||
|
|
||||||
|
echo "Sync complete: $pulled pulled, $skipped already present"
|
||||||
|
echo "Next sync in ${SYNC_INTERVAL}s"
|
||||||
|
sleep "$SYNC_INTERVAL"
|
||||||
|
done
|
||||||
1
docs/changelog.d/feature-ollama-ringtail.feature.md
Normal file
1
docs/changelog.d/feature-ollama-ringtail.feature.md
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management
|
||||||
|
|
@ -135,6 +135,13 @@ services:
|
||||||
current-version: "2026.2.0"
|
current-version: "2026.2.0"
|
||||||
upstream-source: https://github.com/goauthentik/authentik/releases
|
upstream-source: https://github.com/goauthentik/authentik/releases
|
||||||
|
|
||||||
|
- name: ollama
|
||||||
|
type: argocd
|
||||||
|
last-reviewed: "2026-03-02"
|
||||||
|
current-version: "0.17.5"
|
||||||
|
upstream-source: https://github.com/ollama/ollama/releases
|
||||||
|
notes: LLM inference server on ringtail (GPU); upstream container image
|
||||||
|
|
||||||
- name: navidrome
|
- name: navidrome
|
||||||
type: argocd
|
type: argocd
|
||||||
last-reviewed: 2026-03-02
|
last-reviewed: 2026-03-02
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue