diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index b0fc046..464d331 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -85,6 +85,9 @@ caddy_services: - name: ntfy host: "ntfy.{{ caddy_domain }}" backend: "https://ntfy.tail8d86e.ts.net" + - name: ollama + host: "ollama.{{ caddy_domain }}" + backend: "https://ollama.tail8d86e.ts.net" - name: sifaka host: "nas.{{ caddy_domain }}" backend: "http://sifaka:5000" diff --git a/argocd/apps/ollama.yaml b/argocd/apps/ollama.yaml new file mode 100644 index 0000000..bb7a6a9 --- /dev/null +++ b/argocd/apps/ollama.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ollama + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/ollama + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: ollama + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/nvidia-device-plugin/daemonset.yaml b/argocd/manifests/nvidia-device-plugin/daemonset.yaml index 4c57a76..b484959 100644 --- a/argocd/manifests/nvidia-device-plugin/daemonset.yaml +++ b/argocd/manifests/nvidia-device-plugin/daemonset.yaml @@ -25,6 +25,7 @@ spec: image: nvcr.io/nvidia/k8s-device-plugin args: - --device-id-strategy=index + - --config-file=/config/config.yaml env: - name: LD_LIBRARY_PATH value: /run/nvidia/lib @@ -39,6 +40,9 @@ spec: - name: nvidia-libs mountPath: /run/nvidia/lib readOnly: true + - name: plugin-config + mountPath: /config + readOnly: true volumes: - name: device-plugins hostPath: @@ -49,3 +53,6 @@ spec: - name: nvidia-libs hostPath: path: /etc/nvidia-driver/lib + - name: plugin-config + configMap: + name: nvidia-device-plugin-config diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml index 4ffe2d9..102127f 100644 --- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml +++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml @@ -6,6 +6,7 @@ namespace: nvidia-device-plugin resources: - daemonset.yaml - runtime-class.yaml + - time-slicing-config.yaml images: - name: nvcr.io/nvidia/k8s-device-plugin diff --git a/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml new file mode 100644 index 0000000..dee2fd7 --- /dev/null +++ b/argocd/manifests/nvidia-device-plugin/time-slicing-config.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nvidia-device-plugin-config + namespace: nvidia-device-plugin +data: + config.yaml: | + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 2 diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml new file mode 100644 index 0000000..2b68e55 --- /dev/null +++ b/argocd/manifests/ollama/deployment.yaml @@ -0,0 +1,84 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + runtimeClassName: nvidia + containers: + - name: ollama + image: ollama/ollama + ports: + - containerPort: 11434 + name: http + env: + - name: OLLAMA_MODELS + value: /models + - name: OLLAMA_HOST + value: "0.0.0.0:11434" + volumeMounts: + - name: models + mountPath: /models + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "16Gi" + cpu: "4000m" + nvidia.com/gpu: "1" + livenessProbe: + httpGet: + path: /api/tags + port: 11434 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /api/tags + port: 11434 + initialDelaySeconds: 10 + periodSeconds: 10 + - name: model-sync + image: ollama/ollama + command: ["/bin/bash", "/scripts/sync-models.sh"] + env: + - name: MODEL_LIST + value: /config/models.txt + - name: OLLAMA_HOST + value: "http://localhost:11434" + volumeMounts: + - name: models-config + mountPath: /config + - name: sync-script + mountPath: /scripts + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "256Mi" + cpu: "200m" + volumes: + - name: models + persistentVolumeClaim: + claimName: ollama-models + - name: models-config + configMap: + name: ollama-models + - name: sync-script + configMap: + name: ollama-sync-script + defaultMode: 0755 # yamllint disable-line rule:octal-values diff --git a/argocd/manifests/ollama/ingress-tailscale.yaml b/argocd/manifests/ollama/ingress-tailscale.yaml new file mode 100644 index 0000000..bada466 --- /dev/null +++ b/argocd/manifests/ollama/ingress-tailscale.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ollama-tailscale + namespace: ollama + annotations: + tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + gethomepage.dev/enabled: "true" + gethomepage.dev/name: "Ollama" + gethomepage.dev/group: "AI" + gethomepage.dev/icon: "ollama.png" + gethomepage.dev/description: "LLM inference server" + gethomepage.dev/href: "https://ollama.ops.eblu.me" + gethomepage.dev/pod-selector: "app=ollama" +spec: + ingressClassName: tailscale + defaultBackend: + service: + name: ollama + port: + number: 11434 + tls: + - hosts: + - ollama diff --git a/argocd/manifests/ollama/kustomization.yaml b/argocd/manifests/ollama/kustomization.yaml new file mode 100644 index 0000000..75add74 --- /dev/null +++ b/argocd/manifests/ollama/kustomization.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: ollama +resources: + - pv-hostpath.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress-tailscale.yaml + +images: + - name: ollama/ollama + newTag: "0.17.5" + +configMapGenerator: + - name: ollama-models + files: + - models.txt + - name: ollama-sync-script + files: + - sync-models.sh diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt new file mode 100644 index 0000000..a998019 --- /dev/null +++ b/argocd/manifests/ollama/models.txt @@ -0,0 +1,6 @@ +# Models to pull from Ollama registry +# One model per line. Comments with #. +qwen2.5:14b +deepseek-r1:14b +phi4:14b +gemma3:12b diff --git a/argocd/manifests/ollama/pv-hostpath.yaml b/argocd/manifests/ollama/pv-hostpath.yaml new file mode 100644 index 0000000..d25dbcc --- /dev/null +++ b/argocd/manifests/ollama/pv-hostpath.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: ollama-models-pv +spec: + capacity: + storage: 200Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + hostPath: + path: /mnt/storage1/ollama + type: DirectoryOrCreate diff --git a/argocd/manifests/ollama/pvc.yaml b/argocd/manifests/ollama/pvc.yaml new file mode 100644 index 0000000..76c79a8 --- /dev/null +++ b/argocd/manifests/ollama/pvc.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-models + namespace: ollama +spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: ollama-models-pv + resources: + requests: + storage: 200Gi diff --git a/argocd/manifests/ollama/service.yaml b/argocd/manifests/ollama/service.yaml new file mode 100644 index 0000000..d9680e1 --- /dev/null +++ b/argocd/manifests/ollama/service.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 diff --git a/argocd/manifests/ollama/sync-models.sh b/argocd/manifests/ollama/sync-models.sh new file mode 100644 index 0000000..9430704 --- /dev/null +++ b/argocd/manifests/ollama/sync-models.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Sync models from ConfigMap to Ollama server +# Runs as a sidecar in the ollama deployment, using the ollama CLI +set -euo pipefail + +MODEL_LIST="${MODEL_LIST:-/config/models.txt}" +OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}" +SYNC_INTERVAL="${SYNC_INTERVAL:-1800}" + +export OLLAMA_HOST + +echo "Syncing models from ${MODEL_LIST} via ollama CLI (host: ${OLLAMA_HOST})" + +while true; do + # Wait for ollama server to be ready + echo "Waiting for Ollama API..." + max_attempts=60 + attempt=0 + until ollama list > /dev/null 2>&1; do + attempt=$((attempt + 1)) + if [[ $attempt -ge $max_attempts ]]; then + echo "Ollama not ready after ${max_attempts} attempts, will retry next cycle" + sleep "$SYNC_INTERVAL" + continue 2 + fi + sleep 5 + done + echo "Ollama is ready" + + # Get list of currently pulled models + current=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' || true) + + pulled=0 + skipped=0 + + while IFS= read -r model || [[ -n "$model" ]]; do + # Skip empty lines and comments + [[ -z "$model" || "$model" =~ ^[[:space:]]*# ]] && continue + # Trim whitespace + model=$(echo "$model" | xargs) + [[ -z "$model" ]] && continue + + # Check if model is already pulled (ollama list shows name:tag) + if echo "$current" | grep -qF "$model"; then + echo "Already present: $model" + ((skipped++)) || true + else + echo "Pulling: $model" + if ollama pull "$model"; then + echo "Pulled: $model" + ((pulled++)) || true + else + echo "Warning: Failed to pull $model" >&2 + fi + fi + done < "$MODEL_LIST" + + echo "Sync complete: $pulled pulled, $skipped already present" + echo "Next sync in ${SYNC_INTERVAL}s" + sleep "$SYNC_INTERVAL" +done diff --git a/docs/changelog.d/feature-ollama-ringtail.feature.md b/docs/changelog.d/feature-ollama-ringtail.feature.md new file mode 100644 index 0000000..648757e --- /dev/null +++ b/docs/changelog.d/feature-ollama-ringtail.feature.md @@ -0,0 +1 @@ +Deploy Ollama LLM server on ringtail with GPU acceleration and declarative model management diff --git a/service-versions.yaml b/service-versions.yaml index c1c48e1..00e1084 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -135,6 +135,13 @@ services: current-version: "2026.2.0" upstream-source: https://github.com/goauthentik/authentik/releases + - name: ollama + type: argocd + last-reviewed: "2026-03-02" + current-version: "0.17.5" + upstream-source: https://github.com/ollama/ollama/releases + notes: LLM inference server on ringtail (GPU); upstream container image + - name: navidrome type: argocd last-reviewed: 2026-03-02