From 6d4929a66cf150ed251c1155b705ddc86d3d9aad Mon Sep 17 00:00:00 2001
From: Erich Blume <blume.erich@gmail.com>
Date: Wed, 11 Mar 2026 18:55:51 -0700
Subject: [PATCH] Add qwen3.5:27b to Ollama and bump memory limit to 22Gi

The 27B Q4_K_M model is ~17 GB, exceeding the 16 GB VRAM on the RTX 4080
by ~1 GB. Ollama will offload a few layers to CPU RAM, so the pod memory
limit needs headroom beyond the previous 16Gi.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 argocd/manifests/ollama/deployment.yaml | 2 +-
 argocd/manifests/ollama/models.txt      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/argocd/manifests/ollama/deployment.yaml b/argocd/manifests/ollama/deployment.yaml
index 65f17c6..6d02ca3 100644
--- a/argocd/manifests/ollama/deployment.yaml
+++ b/argocd/manifests/ollama/deployment.yaml
@@ -40,7 +40,7 @@ spec:
               memory: "512Mi"
               cpu: "500m"
             limits:
-              memory: "16Gi"
+              memory: "22Gi"
               cpu: "4000m"
               nvidia.com/gpu: "1"
           livenessProbe:
diff --git a/argocd/manifests/ollama/models.txt b/argocd/manifests/ollama/models.txt
index dac83c5..856618d 100644
--- a/argocd/manifests/ollama/models.txt
+++ b/argocd/manifests/ollama/models.txt
@@ -5,3 +5,4 @@ deepseek-r1:14b
 phi4:14b
 gemma3:12b
 qwen3.5:9b
+qwen3.5:27b