From 6d4929a66cf150ed251c1155b705ddc86d3d9aad Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 11 Mar 2026 18:55:51 -0700 Subject: [PATCH] Add qwen3.5:27b to Ollama and bump memory limit to 22Gi The 27B Q4_K_M model is ~17 GB, exceeding the 16 GB VRAM on the RTX 4080 by ~1 GB. Ollama will offload a few layers to CPU RAM, so the pod memory limit needs headroom beyond the previous 16Gi. Co-Authored-By: Claude Opus 4.6 --- argocd/manifests/ollama/deployment.yaml | 2 +- argocd/manifests/ollama/models.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml index 65f17c6..6d02ca3 100644 --- a/argocd/manifests/ollama/deployment.yaml +++ b/argocd/manifests/ollama/deployment.yaml @@ -40,7 +40,7 @@ spec: memory: "512Mi" cpu: "500m" limits: - memory: "16Gi" + memory: "22Gi" cpu: "4000m" nvidia.com/gpu: "1" livenessProbe: diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt index dac83c5..856618d 100644 --- a/argocd/manifests/ollama/models.txt +++ b/argocd/manifests/ollama/models.txt @@ -5,3 +5,4 @@ deepseek-r1:14b phi4:14b gemma3:12b qwen3.5:9b +qwen3.5:27b