Deploy Ollama LLM server on ringtail (#277)
## Summary - Deploy Ollama as a new ArgoCD-managed service on ringtail's k3s cluster with GPU acceleration - Declarative model management via `models.txt` + sidecar sync script (mirrors kiwix torrent pattern) - Initial models: `qwen2.5:14b`, `deepseek-r1:14b`, `phi4:14b`, `gemma3:12b` - hostPath PV on `/mnt/storage1/ollama` for fast local model storage (200Gi) - Tailscale ingress at `ollama.ops.eblu.me` for API access from tailnet - Enable GPU time-slicing (`replicas: 2`) on nvidia-device-plugin so Frigate and Ollama share the RTX 4080 ## Deployment and Testing - [ ] Deploy nvidia-device-plugin changes first: `argocd app sync nvidia-device-plugin` - [ ] Verify GPU time-slicing: `kubectl describe node ringtail --context=k3s-ringtail` shows `nvidia.com/gpu: 2` - [ ] Sync `apps` app with `--revision feature/ollama-ringtail` - [ ] Set ollama app to branch: `argocd app set ollama --revision feature/ollama-ringtail && argocd app sync ollama` - [ ] Verify model-sync sidecar pulls models: `kubectl logs -n ollama deploy/ollama -c model-sync --context=k3s-ringtail` - [ ] Test API: `curl https://ollama.ops.eblu.me/api/tags` - [ ] Test inference: `curl https://ollama.ops.eblu.me/api/generate -d '{"model":"qwen2.5:14b","prompt":"Hello"}'` - [ ] Verify Frigate still works after GPU sharing change - [ ] After merge: `argocd app set ollama --revision main && argocd app sync ollama` Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/277
This commit is contained in:
parent
0f79c61c42
commit
31d925814f
15 changed files with 292 additions and 0 deletions
|
|
@ -85,6 +85,9 @@ caddy_services:
|
|||
- name: ntfy
|
||||
host: "ntfy.{{ caddy_domain }}"
|
||||
backend: "https://ntfy.tail8d86e.ts.net"
|
||||
- name: ollama
|
||||
host: "ollama.{{ caddy_domain }}"
|
||||
backend: "https://ollama.tail8d86e.ts.net"
|
||||
- name: sifaka
|
||||
host: "nas.{{ caddy_domain }}"
|
||||
backend: "http://sifaka:5000"
|
||||
|
|
|
|||
18
argocd/apps/ollama.yaml
Normal file
18
argocd/apps/ollama.yaml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/ollama
|
||||
destination:
|
||||
server: https://ringtail.tail8d86e.ts.net:6443
|
||||
namespace: ollama
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
|
|
@ -25,6 +25,7 @@ spec:
|
|||
image: nvcr.io/nvidia/k8s-device-plugin
|
||||
args:
|
||||
- --device-id-strategy=index
|
||||
- --config-file=/config/config.yaml
|
||||
env:
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: /run/nvidia/lib
|
||||
|
|
@ -39,6 +40,9 @@ spec:
|
|||
- name: nvidia-libs
|
||||
mountPath: /run/nvidia/lib
|
||||
readOnly: true
|
||||
- name: plugin-config
|
||||
mountPath: /config
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: device-plugins
|
||||
hostPath:
|
||||
|
|
@ -49,3 +53,6 @@ spec:
|
|||
- name: nvidia-libs
|
||||
hostPath:
|
||||
path: /etc/nvidia-driver/lib
|
||||
- name: plugin-config
|
||||
configMap:
|
||||
name: nvidia-device-plugin-config
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ namespace: nvidia-device-plugin
|
|||
resources:
|
||||
- daemonset.yaml
|
||||
- runtime-class.yaml
|
||||
- time-slicing-config.yaml
|
||||
|
||||
images:
|
||||
- name: nvcr.io/nvidia/k8s-device-plugin
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nvidia-device-plugin-config
|
||||
namespace: nvidia-device-plugin
|
||||
data:
|
||||
config.yaml: |
|
||||
version: v1
|
||||
sharing:
|
||||
timeSlicing:
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 2
|
||||
84
argocd/manifests/ollama/deployment.yaml
Normal file
84
argocd/manifests/ollama/deployment.yaml
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ollama
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
name: http
|
||||
env:
|
||||
- name: OLLAMA_MODELS
|
||||
value: /models
|
||||
- name: OLLAMA_HOST
|
||||
value: "0.0.0.0:11434"
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /models
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "16Gi"
|
||||
cpu: "4000m"
|
||||
nvidia.com/gpu: "1"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/tags
|
||||
port: 11434
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/tags
|
||||
port: 11434
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
- name: model-sync
|
||||
image: ollama/ollama
|
||||
command: ["/bin/bash", "/scripts/sync-models.sh"]
|
||||
env:
|
||||
- name: MODEL_LIST
|
||||
value: /config/models.txt
|
||||
- name: OLLAMA_HOST
|
||||
value: "http://localhost:11434"
|
||||
volumeMounts:
|
||||
- name: models-config
|
||||
mountPath: /config
|
||||
- name: sync-script
|
||||
mountPath: /scripts
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "50m"
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
cpu: "200m"
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: ollama-models
|
||||
- name: models-config
|
||||
configMap:
|
||||
name: ollama-models
|
||||
- name: sync-script
|
||||
configMap:
|
||||
name: ollama-sync-script
|
||||
defaultMode: 0755 # yamllint disable-line rule:octal-values
|
||||
26
argocd/manifests/ollama/ingress-tailscale.yaml
Normal file
26
argocd/manifests/ollama/ingress-tailscale.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: ollama-tailscale
|
||||
namespace: ollama
|
||||
annotations:
|
||||
tailscale.com/proxy-class: "default"
|
||||
tailscale.com/proxy-group: "ingress"
|
||||
gethomepage.dev/enabled: "true"
|
||||
gethomepage.dev/name: "Ollama"
|
||||
gethomepage.dev/group: "AI"
|
||||
gethomepage.dev/icon: "ollama.png"
|
||||
gethomepage.dev/description: "LLM inference server"
|
||||
gethomepage.dev/href: "https://ollama.ops.eblu.me"
|
||||
gethomepage.dev/pod-selector: "app=ollama"
|
||||
spec:
|
||||
ingressClassName: tailscale
|
||||
defaultBackend:
|
||||
service:
|
||||
name: ollama
|
||||
port:
|
||||
number: 11434
|
||||
tls:
|
||||
- hosts:
|
||||
- ollama
|
||||
22
argocd/manifests/ollama/kustomization.yaml
Normal file
22
argocd/manifests/ollama/kustomization.yaml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: ollama
|
||||
resources:
|
||||
- pv-hostpath.yaml
|
||||
- pvc.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- ingress-tailscale.yaml
|
||||
|
||||
images:
|
||||
- name: ollama/ollama
|
||||
newTag: "0.17.5"
|
||||
|
||||
configMapGenerator:
|
||||
- name: ollama-models
|
||||
files:
|
||||
- models.txt
|
||||
- name: ollama-sync-script
|
||||
files:
|
||||
- sync-models.sh
|
||||
6
argocd/manifests/ollama/models.txt
Normal file
6
argocd/manifests/ollama/models.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# Models to pull from Ollama registry
|
||||
# One model per line. Comments with #.
|
||||
qwen2.5:14b
|
||||
deepseek-r1:14b
|
||||
phi4:14b
|
||||
gemma3:12b
|
||||
15
argocd/manifests/ollama/pv-hostpath.yaml
Normal file
15
argocd/manifests/ollama/pv-hostpath.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: ollama-models-pv
|
||||
spec:
|
||||
capacity:
|
||||
storage: 200Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: ""
|
||||
hostPath:
|
||||
path: /mnt/storage1/ollama
|
||||
type: DirectoryOrCreate
|
||||
14
argocd/manifests/ollama/pvc.yaml
Normal file
14
argocd/manifests/ollama/pvc.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ollama-models
|
||||
namespace: ollama
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: ""
|
||||
volumeName: ollama-models-pv
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Gi
|
||||
13
argocd/manifests/ollama/service.yaml
Normal file
13
argocd/manifests/ollama/service.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ollama
|
||||
spec:
|
||||
selector:
|
||||
app: ollama
|
||||
ports:
|
||||
- name: http
|
||||
port: 11434
|
||||
targetPort: 11434
|
||||
61
argocd/manifests/ollama/sync-models.sh
Normal file
61
argocd/manifests/ollama/sync-models.sh
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
#!/bin/bash
|
||||
# Sync models from ConfigMap to Ollama server
|
||||
# Runs as a sidecar in the ollama deployment, using the ollama CLI
|
||||
set -euo pipefail
|
||||
|
||||
MODEL_LIST="${MODEL_LIST:-/config/models.txt}"
|
||||
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
|
||||
SYNC_INTERVAL="${SYNC_INTERVAL:-1800}"
|
||||
|
||||
export OLLAMA_HOST
|
||||
|
||||
echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})"
|
||||
|
||||
while true; do
|
||||
# Wait for ollama server to be ready
|
||||
echo "Waiting for Ollama API..."
|
||||
max_attempts=60
|
||||
attempt=0
|
||||
until ollama list > /dev/null 2>&1; do
|
||||
attempt=$((attempt + 1))
|
||||
if [[ $attempt -ge $max_attempts ]]; then
|
||||
echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle"
|
||||
sleep "$SYNC_INTERVAL"
|
||||
continue 2
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
echo "Ollama is ready"
|
||||
|
||||
# Get list of currently pulled models
|
||||
current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true)
|
||||
|
||||
pulled=0
|
||||
skipped=0
|
||||
|
||||
while IFS= read -r model || [[ -n "$model" ]]; do
|
||||
# Skip empty lines and comments
|
||||
[[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue
|
||||
# Trim whitespace
|
||||
model=$(echo "$model" | xargs)
|
||||
[[ -z "$model" ]] && continue
|
||||
|
||||
# Check if model is already pulled (ollama list shows name:tag)
|
||||
if echo "$current" | grep -qF "$model"; then
|
||||
echo "Already present: $model"
|
||||
((skipped++)) || true
|
||||
else
|
||||
echo "Pulling: $model"
|
||||
if ollama pull "$model"; then
|
||||
echo "Pulled: $model"
|
||||
((pulled++)) || true
|
||||
else
|
||||
echo "Warning: Failed to pull $model" >&2
|
||||
fi
|
||||
fi
|
||||
done < "$MODEL_LIST"
|
||||
|
||||
echo "Sync complete: $pulled pulled, $skipped already present"
|
||||
echo "Next sync in ${SYNC_INTERVAL}s"
|
||||
sleep "$SYNC_INTERVAL"
|
||||
done
|
||||
1
docs/changelog.d/feature-ollama-ringtail.feature.md
Normal file
1
docs/changelog.d/feature-ollama-ringtail.feature.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management
|
||||
|
|
@ -135,6 +135,13 @@ services:
|
|||
current-version: "2026.2.0"
|
||||
upstream-source: https://github.com/goauthentik/authentik/releases
|
||||
|
||||
- name: ollama
|
||||
type: argocd
|
||||
last-reviewed: "2026-03-02"
|
||||
current-version: "0.17.5"
|
||||
upstream-source: https://github.com/ollama/ollama/releases
|
||||
notes: LLM inference server on ringtail (GPU); upstream container image
|
||||
|
||||
- name: navidrome
|
||||
type: argocd
|
||||
last-reviewed: 2026-03-02
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue