---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ollama
  namespace: ollama
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: ollama
  template:
    metadata:
      labels:
        app: ollama
    spec:
      runtimeClassName: nvidia
      securityContext:
        seccompProfile:
          type: RuntimeDefault
      containers:
        - name: ollama
          image: ollama/ollama:kustomized
          ports:
            - containerPort: 11434
              name: http
          env:
            - name: OLLAMA_MODELS
              value: /models
            - name: OLLAMA_HOST
              value: "0.0.0.0:11434"
            - name: OLLAMA_MAX_LOADED_MODELS
              value: "1"
            - name: OLLAMA_NUM_PARALLEL
              value: "1"
            - name: OLLAMA_FLASH_ATTENTION
              value: "1"
          volumeMounts:
            - name: models
              mountPath: /models
          resources:
            requests:
              memory: "512Mi"
              cpu: "500m"
            limits:
              memory: "24Gi"
              cpu: "4000m"
              nvidia.com/gpu: "1"
          livenessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 30
            periodSeconds: 30
          readinessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 10
            periodSeconds: 10
        - name: model-sync
          image: ollama/ollama:kustomized
          command: ["/bin/bash", "/scripts/sync-models.sh"]
          env:
            - name: MODEL_LIST
              value: /config/models.txt
            - name: OLLAMA_HOST
              value: "http://localhost:11434"
          volumeMounts:
            - name: models-config
              mountPath: /config
            - name: sync-script
              mountPath: /scripts
          resources:
            requests:
              memory: "64Mi"
              cpu: "50m"
            limits:
              memory: "256Mi"
              cpu: "200m"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: ollama-models
        - name: models-config
          configMap:
            name: ollama-models
        - name: sync-script
          configMap:
            name: ollama-sync-script
            defaultMode: 0755