From 919f926241c0e3c2e03902c1601e5f742ec7810a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 20 Jan 2026 21:06:53 -0800 Subject: [PATCH 01/16] P5.1: Update minikube role for QEMU2 driver - Change minikube driver from podman to qemu2 - Change container runtime from cri-o to containerd - Add qemu installation to minikube role - Remove podman role from indri.yml playbook - Update handlers for containerd instead of cri-o - Temporarily disable registry mirror config (needs containerd format) - Add k8s-storage synology user creation steps to P5.1 doc - Add post-migration tasks for zot registry mirror reconfiguration Co-Authored-By: Claude Opus 4.5 --- ansible/playbooks/indri.yml | 2 - ansible/roles/minikube/defaults/main.yml | 10 +-- ansible/roles/minikube/handlers/main.yml | 4 +- ansible/roles/minikube/tasks/main.yml | 75 +++++++-------------- plans/k8s-migration/P5.1_qemu2_migration.md | 33 +++++++++ 5 files changed, 62 insertions(+), 62 deletions(-) diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index e001da5..5418bc0 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -47,8 +47,6 @@ tags: zot - role: zot_metrics tags: zot_metrics - - role: podman - tags: podman - role: minikube tags: minikube - role: minikube_metrics diff --git a/ansible/roles/minikube/defaults/main.yml b/ansible/roles/minikube/defaults/main.yml index 0b2921d..cd53839 100644 --- a/ansible/roles/minikube/defaults/main.yml +++ b/ansible/roles/minikube/defaults/main.yml @@ -1,11 +1,10 @@ --- # Minikube cluster configuration minikube_cpus: 4 -# Note: Must be less than podman machine memory (8192MB) to account for overhead -minikube_memory: 7800 +minikube_memory: 8192 minikube_disk_size: "200g" -minikube_driver: podman -minikube_container_runtime: cri-o +minikube_driver: qemu2 +minikube_container_runtime: containerd # Remote access configuration # These allow kubectl from other machines (e.g., gilbert) to connect @@ -13,8 +12,5 @@ minikube_container_runtime: cri-o minikube_apiserver_names: - k8s.tail8d86e.ts.net - indri -# Note: apiserver_port is the INTERNAL container port; with podman driver, -# the host port is dynamically assigned. Check actual port with: -# kubectl config view --minify -o jsonpath="{.clusters[0].cluster.server}" minikube_apiserver_port: 6443 minikube_listen_address: "0.0.0.0" diff --git a/ansible/roles/minikube/handlers/main.yml b/ansible/roles/minikube/handlers/main.yml index b609e52..44ad747 100644 --- a/ansible/roles/minikube/handlers/main.yml +++ b/ansible/roles/minikube/handlers/main.yml @@ -8,7 +8,7 @@ minikube start changed_when: true -- name: Restart CRI-O in minikube +- name: Restart containerd in minikube ansible.builtin.command: - cmd: minikube ssh --native-ssh=false "sudo systemctl restart crio" + cmd: minikube ssh --native-ssh=false "sudo systemctl restart containerd" changed_when: true diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index 64e0019..2eb8e59 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -1,14 +1,19 @@ --- # Minikube installation and cluster setup for indri -# Requires podman machine to be running (see podman role) +# Uses qemu2 driver for full VM with kernel mount capabilities (NFS, SMB, etc.) # -# NOTE: Similar to podman, minikube start may have issues when run via SSH. +# NOTE: minikube start may have issues when run via SSH. # If cluster fails to start, manually run on indri: -# minikube start --driver=podman --container-runtime=cri-o \ -# --cpus=4 --memory=7800 --disk-size=200g \ +# minikube start --driver=qemu2 --container-runtime=containerd \ +# --cpus=4 --memory=8192 --disk-size=200g \ # --apiserver-names=k8s.tail8d86e.ts.net --apiserver-names=indri \ # --apiserver-port=6443 --listen-address=0.0.0.0 +- name: Install qemu via homebrew (required for qemu2 driver) + community.general.homebrew: + name: qemu + state: present + - name: Install minikube via homebrew community.general.homebrew: name: minikube @@ -57,54 +62,22 @@ msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}" when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout -# Configure CRI-O to use zot as pull-through cache -- name: Read desired zot mirror config - ansible.builtin.slurp: - src: "{{ role_path }}/files/zot-mirror.conf" - register: minikube_desired_zot_config - delegate_to: localhost - when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout - -- name: Check current zot mirror config in minikube +# Configure containerd to use zot as pull-through cache +# With qemu2 driver, host is accessible via host.minikube.internal +# Zot listens on indri:5050 (localhost:5050 from host perspective) +- name: Get host IP for registry mirror config ansible.builtin.command: - cmd: minikube ssh --native-ssh=false "cat /etc/containers/registries.conf.d/zot-mirror.conf 2>/dev/null || echo ''" - register: minikube_existing_zot_config + cmd: minikube ssh --native-ssh=false "getent hosts host.minikube.internal | awk '{print \$1}'" + register: minikube_host_ip changed_when: false + failed_when: false when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Determine if zot mirror config needs update - ansible.builtin.set_fact: - minikube_zot_config_changed: "{{ (minikube_existing_zot_config.stdout | trim) != (minikube_desired_zot_config.content | b64decode | trim) }}" - when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout - -- name: Copy zot mirror config to temp location - ansible.builtin.copy: - src: zot-mirror.conf - dest: /tmp/zot-mirror.conf - mode: "0644" - when: - - minikube_final_status.rc == 0 - - "'Running' in minikube_final_status.stdout" - - minikube_zot_config_changed | default(false) - -- name: Apply zot mirror config to minikube - ansible.builtin.shell: - cmd: | - set -o pipefail - cat /tmp/zot-mirror.conf | minikube ssh --native-ssh=false "sudo tee /etc/containers/registries.conf.d/zot-mirror.conf > /dev/null" - executable: /bin/bash - changed_when: true # Task only runs when config needs updating - when: - - minikube_final_status.rc == 0 - - "'Running' in minikube_final_status.stdout" - - minikube_zot_config_changed | default(false) - notify: Restart CRI-O in minikube - -- name: Clean up temp config file - ansible.builtin.file: - path: /tmp/zot-mirror.conf - state: absent - when: - - minikube_final_status.rc == 0 - - "'Running' in minikube_final_status.stdout" - - minikube_zot_config_changed | default(false) +- name: Configure insecure registry for zot + ansible.builtin.command: + cmd: "minikube addons configure registry-creds" + changed_when: false + failed_when: false + when: false # TODO: Configure containerd registry mirrors after basic migration works + # For now, images will be pulled directly from public registries + # We can add zot mirror config later via containerd config or minikube addons diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index 73b47b8..032baea 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -45,6 +45,26 @@ QEMU2 creates an actual VM (not a container), which has: --- +## Prerequisites (Manual Steps) + +### Create Synology User for Kubernetes Storage Access + +Create a dedicated Synology user for k8s NFS/SMB access (do not use personal account): + +On Synology DSM (Control Panel → User & Group): +1. Create new user: `k8s-storage` + - Set a strong password + - No admin privileges needed + - Deny access to all applications (only needs file services) +2. Set permissions on the `torrents` share: + - Give `k8s-storage` user Read/Write access +3. Store credentials in 1Password: + - Vault: `vg6xf6vvfmoh5hqjjhlhbeoaie` (blumeops vault) + - Item name: `synology-k8s-storage` + - Fields: `username` (k8s-storage), `password` + +--- + ## Plan ### 1. Export Current State @@ -233,3 +253,16 @@ All state is in git, so cluster recreation is straightforward. - First boot may be slower due to VM initialization - socket_vmnet provides better networking but requires sudo setup - Consider creating a LaunchAgent for `minikube mount` if using that approach + +## Post-Migration Tasks + +### Zot Registry Mirror (TODO) + +The CRI-O registry mirror config (`zot-mirror.conf`) is not compatible with containerd. After the basic QEMU2 migration is verified working, we need to: + +1. **Reconfigure zot mirrors for containerd** - containerd uses a different config format at `/etc/containerd/config.toml` +2. **Update host address** - `host.containers.internal` (podman-specific) won't work; use `host.minikube.internal` or the host IP instead +3. **Test registry caching** - verify images are being cached through zot +4. **Update ansible role** - add containerd-specific registry mirror configuration + +For now, images will be pulled directly from public registries (docker.io, ghcr.io, etc.) which is fine for the migration but loses the caching benefits. -- 2.50.1 (Apple Git-155) From 0474962e89893d9498a3595acf96ee94ade29e32 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 20 Jan 2026 21:20:04 -0800 Subject: [PATCH 02/16] Increase minikube resources to 6 CPUs and 12GB RAM Co-Authored-By: Claude Opus 4.5 --- ansible/roles/minikube/defaults/main.yml | 4 ++-- ansible/roles/minikube/tasks/main.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/minikube/defaults/main.yml b/ansible/roles/minikube/defaults/main.yml index cd53839..f42a416 100644 --- a/ansible/roles/minikube/defaults/main.yml +++ b/ansible/roles/minikube/defaults/main.yml @@ -1,7 +1,7 @@ --- # Minikube cluster configuration -minikube_cpus: 4 -minikube_memory: 8192 +minikube_cpus: 6 +minikube_memory: 12288 minikube_disk_size: "200g" minikube_driver: qemu2 minikube_container_runtime: containerd diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index 2eb8e59..36965a7 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -5,7 +5,7 @@ # NOTE: minikube start may have issues when run via SSH. # If cluster fails to start, manually run on indri: # minikube start --driver=qemu2 --container-runtime=containerd \ -# --cpus=4 --memory=8192 --disk-size=200g \ +# --cpus=6 --memory=12288 --disk-size=200g \ # --apiserver-names=k8s.tail8d86e.ts.net --apiserver-names=indri \ # --apiserver-port=6443 --listen-address=0.0.0.0 -- 2.50.1 (Apple Git-155) From 4b2c1a346f696749a5e7dcd1947e3255c26bba32 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 20 Jan 2026 21:41:47 -0800 Subject: [PATCH 03/16] Add socket_vmnet for proper qemu2 networking - Install socket_vmnet via homebrew - Start socket_vmnet service (requires sudo) - Add --network=socket_vmnet to minikube start Co-Authored-By: Claude Opus 4.5 --- ansible/roles/minikube/defaults/main.yml | 1 + ansible/roles/minikube/tasks/main.yml | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ansible/roles/minikube/defaults/main.yml b/ansible/roles/minikube/defaults/main.yml index f42a416..e753f38 100644 --- a/ansible/roles/minikube/defaults/main.yml +++ b/ansible/roles/minikube/defaults/main.yml @@ -4,6 +4,7 @@ minikube_cpus: 6 minikube_memory: 12288 minikube_disk_size: "200g" minikube_driver: qemu2 +minikube_network: socket_vmnet minikube_container_runtime: containerd # Remote access configuration diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index 36965a7..aca060f 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -1,10 +1,11 @@ --- # Minikube installation and cluster setup for indri # Uses qemu2 driver for full VM with kernel mount capabilities (NFS, SMB, etc.) +# Requires socket_vmnet for proper networking (minikube service/tunnel commands) # # NOTE: minikube start may have issues when run via SSH. # If cluster fails to start, manually run on indri: -# minikube start --driver=qemu2 --container-runtime=containerd \ +# minikube start --driver=qemu2 --network=socket_vmnet --container-runtime=containerd \ # --cpus=6 --memory=12288 --disk-size=200g \ # --apiserver-names=k8s.tail8d86e.ts.net --apiserver-names=indri \ # --apiserver-port=6443 --listen-address=0.0.0.0 @@ -14,6 +15,19 @@ name: qemu state: present +- name: Install socket_vmnet via homebrew (required for qemu2 networking) + community.general.homebrew: + name: socket_vmnet + state: present + +- name: Start socket_vmnet service + ansible.builtin.command: + cmd: brew services start socket_vmnet + become: true + register: minikube_socket_vmnet_start + changed_when: "'Successfully started' in minikube_socket_vmnet_start.stdout" + failed_when: false + - name: Install minikube via homebrew community.general.homebrew: name: minikube @@ -36,6 +50,7 @@ cmd: > minikube start --driver={{ minikube_driver }} + --network={{ minikube_network }} --container-runtime={{ minikube_container_runtime }} --cpus={{ minikube_cpus }} --memory={{ minikube_memory }} -- 2.50.1 (Apple Git-155) From 26ec02e1be22213c3b087b468a659a0771b75be2 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 08:03:21 -0800 Subject: [PATCH 04/16] P5.1: Add VM config to ansible role, mark phase complete - Add hosts file entry for registry.tail8d86e.ts.net in VM - Configure containerd registry mirror to use local zot - Update P5.1 doc with implementation notes and manual steps - Mark P5.1 as complete Manual steps still required after cluster creation: 1. sudo brew services start socket_vmnet (once per reboot) 2. sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents-nfs 3. minikube mount /Volumes/torrents-nfs:/mnt/torrents (GUI session) Co-Authored-By: Claude Opus 4.5 --- ansible/roles/minikube/tasks/main.yml | 38 +++++++++------ plans/k8s-migration/P5.1_qemu2_migration.md | 52 +++++++++++++++++---- 2 files changed, 67 insertions(+), 23 deletions(-) diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index aca060f..d704ba0 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -77,22 +77,32 @@ msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}" when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout -# Configure containerd to use zot as pull-through cache -# With qemu2 driver, host is accessible via host.minikube.internal -# Zot listens on indri:5050 (localhost:5050 from host perspective) -- name: Get host IP for registry mirror config +# Configure VM to access zot registry on host +# The VM can't resolve Tailscale hostnames, so we add a hosts entry +# and configure containerd to use the local zot instance +- name: Add registry hostname to VM hosts file ansible.builtin.command: - cmd: minikube ssh --native-ssh=false "getent hosts host.minikube.internal | awk '{print \$1}'" - register: minikube_host_ip - changed_when: false - failed_when: false + cmd: minikube ssh --native-ssh=false "grep -q 'registry.tail8d86e.ts.net' /etc/hosts || echo '192.168.105.1 registry.tail8d86e.ts.net' | sudo tee -a /etc/hosts" + register: minikube_hosts_entry + changed_when: "'registry.tail8d86e.ts.net' in minikube_hosts_entry.stdout" when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Configure insecure registry for zot +- name: Create containerd registry mirror directory ansible.builtin.command: - cmd: "minikube addons configure registry-creds" + cmd: minikube ssh --native-ssh=false "sudo mkdir -p /etc/containerd/certs.d/registry.tail8d86e.ts.net" + register: minikube_registry_dir changed_when: false - failed_when: false - when: false # TODO: Configure containerd registry mirrors after basic migration works - # For now, images will be pulled directly from public registries - # We can add zot mirror config later via containerd config or minikube addons + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + +- name: Configure containerd registry mirror for zot + ansible.builtin.command: + cmd: | + minikube ssh --native-ssh=false 'echo "server = \"http://host.minikube.internal:5050\" + + [host.\"http://host.minikube.internal:5050\"] + capabilities = [\"pull\", \"resolve\"] + skip_verify = true" | sudo tee /etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml' + register: minikube_registry_config + changed_when: minikube_registry_config.rc == 0 + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + notify: Restart containerd in minikube diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index 032baea..64c5be7 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -2,7 +2,7 @@ **Goal**: Replace the podman driver with qemu2 to enable proper volume mounts (hostPath, NFS, SMB CSI) -**Status**: Planning +**Status**: Complete (2026-01-21) **Prerequisites**: [Phase 5](P5_devpi.complete.md) complete @@ -254,15 +254,49 @@ All state is in git, so cluster recreation is straightforward. - socket_vmnet provides better networking but requires sudo setup - Consider creating a LaunchAgent for `minikube mount` if using that approach -## Post-Migration Tasks +## Implementation Notes (2026-01-21) -### Zot Registry Mirror (TODO) +### What Actually Worked -The CRI-O registry mirror config (`zot-mirror.conf`) is not compatible with containerd. After the basic QEMU2 migration is verified working, we need to: +**Volume mounting solution**: NFS mount on indri (host) + `minikube mount` to pass through to VM -1. **Reconfigure zot mirrors for containerd** - containerd uses a different config format at `/etc/containerd/config.toml` -2. **Update host address** - `host.containers.internal` (podman-specific) won't work; use `host.minikube.internal` or the host IP instead -3. **Test registry caching** - verify images are being cached through zot -4. **Update ansible role** - add containerd-specific registry mirror configuration +1. Mount sifaka's torrents share on indri via NFS: `sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents-nfs` +2. Run `minikube mount /Volumes/torrents-nfs:/mnt/torrents` from indri console (GUI session required due to macOS security) +3. Pods can access `/mnt/torrents` via hostPath -For now, images will be pulled directly from public registries (docker.io, ghcr.io, etc.) which is fine for the migration but loses the caching benefits. +**Why NFS from inside VM didn't work**: Despite allowing 192.168.105.0/24 in Synology NFS settings, the VM got "access denied". Root cause unknown - may be Synology NFS quirk. + +**Why SMB didn't work**: The minikube containerd kernel doesn't include the CIFS module. + +### Zot Registry Mirror (Implemented) + +The ansible role now configures containerd to redirect `registry.tail8d86e.ts.net` to `host.minikube.internal:5050`: +- Adds hosts file entry in VM +- Creates containerd registry mirror config at `/etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml` + +### Manual Steps Still Required + +These steps cannot be fully automated via ansible and must be done manually: + +1. **socket_vmnet service (once per reboot)**: + ```bash + # On indri console: + sudo brew services start socket_vmnet + ``` + +2. **NFS mount on indri (once per reboot)**: + ```bash + # On indri console: + sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents-nfs + ``` + +3. **minikube mount (must run in GUI session)**: + ```bash + # On indri console (not SSH - requires GUI session for macOS security): + minikube mount /Volumes/torrents-nfs:/mnt/torrents + # Keep this terminal open - the mount dies if process exits + ``` + +### TODO: LaunchAgent for Persistent Mount + +Create a LaunchAgent to run `minikube mount` at login. Challenge: must run in GUI session context for macOS security model. -- 2.50.1 (Apple Git-155) From 40376b635f52cdb29588fcf4cd57d0f7a31997d8 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 08:22:53 -0800 Subject: [PATCH 05/16] Add LaunchDaemon/LaunchAgent for persistent NFS and minikube mounts - LaunchDaemon: mounts sifaka:/volume1/torrents to /Volumes/torrents-nfs at boot - LaunchAgent: runs minikube mount to pass through to /mnt/torrents in VM - Handlers to load both services when plist files change Co-Authored-By: Claude Opus 4.5 --- .../files/com.blumeops.minikube-mount.plist | 41 +++++++++++++++++++ .../files/com.blumeops.nfs-torrents.plist | 24 +++++++++++ ansible/roles/minikube/handlers/main.yml | 13 ++++++ ansible/roles/minikube/tasks/main.yml | 28 +++++++++++++ 4 files changed, 106 insertions(+) create mode 100644 ansible/roles/minikube/files/com.blumeops.minikube-mount.plist create mode 100644 ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist diff --git a/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist b/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist new file mode 100644 index 0000000..2313c52 --- /dev/null +++ b/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist @@ -0,0 +1,41 @@ + + + + + Label + com.blumeops.minikube-mount + ProgramArguments + + /bin/bash + -c + +# Wait for minikube to be running +for i in {1..60}; do + if /opt/homebrew/bin/minikube status | grep -q "Running"; then + break + fi + sleep 5 +done + +# Wait for NFS mount to be available +for i in {1..30}; do + if mount | grep -q "/Volumes/torrents-nfs"; then + break + fi + sleep 2 +done + +# Start the mount (this blocks until killed) +exec /opt/homebrew/bin/minikube mount /Volumes/torrents-nfs:/mnt/torrents + + + RunAtLoad + + KeepAlive + + StandardErrorPath + /tmp/minikube-mount.err + StandardOutPath + /tmp/minikube-mount.log + + diff --git a/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist b/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist new file mode 100644 index 0000000..1cec7a6 --- /dev/null +++ b/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist @@ -0,0 +1,24 @@ + + + + + Label + com.blumeops.nfs-torrents + ProgramArguments + + /sbin/mount + -t + nfs + -o + resvport,rw + sifaka:/volume1/torrents + /Volumes/torrents-nfs + + RunAtLoad + + StandardErrorPath + /tmp/nfs-torrents.err + StandardOutPath + /tmp/nfs-torrents.log + + diff --git a/ansible/roles/minikube/handlers/main.yml b/ansible/roles/minikube/handlers/main.yml index 44ad747..8af6513 100644 --- a/ansible/roles/minikube/handlers/main.yml +++ b/ansible/roles/minikube/handlers/main.yml @@ -12,3 +12,16 @@ ansible.builtin.command: cmd: minikube ssh --native-ssh=false "sudo systemctl restart containerd" changed_when: true + +- name: Load NFS mount LaunchDaemon + ansible.builtin.command: + cmd: launchctl load /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist + become: true + failed_when: false + changed_when: true + +- name: Load minikube mount LaunchAgent + ansible.builtin.command: + cmd: launchctl load {{ ansible_env.HOME }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist + failed_when: false + changed_when: true diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index d704ba0..fec9a50 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -106,3 +106,31 @@ changed_when: minikube_registry_config.rc == 0 when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout notify: Restart containerd in minikube + +# Set up persistent NFS mount from sifaka and minikube mount passthrough +# NFS mount uses LaunchDaemon (runs as root at boot) +# Minikube mount uses LaunchAgent (runs in user GUI session at login) + +- name: Create NFS mount point + ansible.builtin.file: + path: /Volumes/torrents-nfs + state: directory + mode: "0755" + become: true + +- name: Install NFS mount LaunchDaemon + ansible.builtin.copy: + src: com.blumeops.nfs-torrents.plist + dest: /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist + owner: root + group: wheel + mode: "0644" + become: true + notify: Load NFS mount LaunchDaemon + +- name: Install minikube mount LaunchAgent + ansible.builtin.copy: + src: com.blumeops.minikube-mount.plist + dest: "{{ ansible_env.HOME }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist" + mode: "0644" + notify: Load minikube mount LaunchAgent -- 2.50.1 (Apple Git-155) From b096df4c71825832036ccdee229bab24df121ec0 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 11:24:44 -0800 Subject: [PATCH 06/16] Fix ansible idempotency and document macOS network permission - Check containerd registry config before writing to avoid unnecessary changes - Fix ansible_env deprecation warnings (use ansible_facts['env']) - Document macOS network permission popup for minikube mount - Document passwordless sudo configuration for indri - Add checks to skip sudo tasks when state already matches Co-Authored-By: Claude Opus 4.5 --- ansible/roles/minikube/handlers/main.yml | 2 +- ansible/roles/minikube/tasks/main.yml | 38 +++++++++++++++++++-- plans/k8s-migration/P5.1_qemu2_migration.md | 14 ++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/ansible/roles/minikube/handlers/main.yml b/ansible/roles/minikube/handlers/main.yml index 8af6513..7d62a8b 100644 --- a/ansible/roles/minikube/handlers/main.yml +++ b/ansible/roles/minikube/handlers/main.yml @@ -22,6 +22,6 @@ - name: Load minikube mount LaunchAgent ansible.builtin.command: - cmd: launchctl load {{ ansible_env.HOME }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist + cmd: launchctl load {{ ansible_facts['env']['HOME'] }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist failed_when: false changed_when: true diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index fec9a50..a471d82 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -20,6 +20,13 @@ name: socket_vmnet state: present +- name: Check if socket_vmnet process is running + ansible.builtin.command: + cmd: pgrep socket_vmnet + register: minikube_socket_vmnet_status + changed_when: false + failed_when: false + - name: Start socket_vmnet service ansible.builtin.command: cmd: brew services start socket_vmnet @@ -27,6 +34,7 @@ register: minikube_socket_vmnet_start changed_when: "'Successfully started' in minikube_socket_vmnet_start.stdout" failed_when: false + when: minikube_socket_vmnet_status.rc != 0 - name: Install minikube via homebrew community.general.homebrew: @@ -94,6 +102,13 @@ changed_when: false when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout +- name: Check containerd registry mirror config + ansible.builtin.command: + cmd: minikube ssh --native-ssh=false "cat /etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml 2>/dev/null || echo ''" + register: minikube_registry_config_current + changed_when: false + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - name: Configure containerd registry mirror for zot ansible.builtin.command: cmd: | @@ -103,13 +118,23 @@ capabilities = [\"pull\", \"resolve\"] skip_verify = true" | sudo tee /etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml' register: minikube_registry_config - changed_when: minikube_registry_config.rc == 0 - when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + changed_when: true + when: + - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - "'host.minikube.internal:5050' not in minikube_registry_config_current.stdout" notify: Restart containerd in minikube # Set up persistent NFS mount from sifaka and minikube mount passthrough # NFS mount uses LaunchDaemon (runs as root at boot) # Minikube mount uses LaunchAgent (runs in user GUI session at login) +# +# NOTE: Tasks with become:true require passwordless sudo on indri +# (configured via /etc/sudoers.d/erichblume) + +- name: Check if NFS mount point exists + ansible.builtin.stat: + path: /Volumes/torrents-nfs + register: minikube_nfs_mount_point - name: Create NFS mount point ansible.builtin.file: @@ -117,6 +142,12 @@ state: directory mode: "0755" become: true + when: not minikube_nfs_mount_point.stat.exists + +- name: Check if NFS LaunchDaemon is installed + ansible.builtin.stat: + path: /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist + register: minikube_nfs_launchdaemon - name: Install NFS mount LaunchDaemon ansible.builtin.copy: @@ -127,10 +158,11 @@ mode: "0644" become: true notify: Load NFS mount LaunchDaemon + when: not minikube_nfs_launchdaemon.stat.exists - name: Install minikube mount LaunchAgent ansible.builtin.copy: src: com.blumeops.minikube-mount.plist - dest: "{{ ansible_env.HOME }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist" + dest: "{{ ansible_facts['env']['HOME'] }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist" mode: "0644" notify: Load minikube mount LaunchAgent diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index 64c5be7..d4156c7 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -274,6 +274,20 @@ The ansible role now configures containerd to redirect `registry.tail8d86e.ts.ne - Adds hosts file entry in VM - Creates containerd registry mirror config at `/etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml` +### Passwordless Sudo on Indri + +Configured passwordless sudo for `erichblume` user to allow ansible `become: true` tasks to run without `-K` flag: + +```bash +echo "erichblume ALL=(ALL) NOPASSWD: ALL" | sudo tee /etc/sudoers.d/erichblume +``` + +This is acceptable given the security model - tailnet access is the trust boundary. + +### macOS Network Permission + +The first time `minikube mount` runs, macOS will show a GUI popup asking to allow network access. This must be approved from the indri console (not SSH). Once approved, subsequent runs won't prompt. + ### Manual Steps Still Required These steps cannot be fully automated via ansible and must be done manually: -- 2.50.1 (Apple Git-155) From 2c28a3fc540910fbf6a59eb4abcb13fbd9a914f8 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 11:45:31 -0800 Subject: [PATCH 07/16] Update tailscale_serve for qemu2 API server address The k8s API server is now at 192.168.105.2:6443 (inside qemu2 VM) instead of localhost:44491 (old podman port mapping). Note: TCP passthrough via tailscale svc:k8s is configured but connection times out - may need admin console approval or debugging. Co-Authored-By: Claude Opus 4.5 --- ansible/roles/tailscale_serve/defaults/main.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ansible/roles/tailscale_serve/defaults/main.yml b/ansible/roles/tailscale_serve/defaults/main.yml index a7b437f..e9c5970 100644 --- a/ansible/roles/tailscale_serve/defaults/main.yml +++ b/ansible/roles/tailscale_serve/defaults/main.yml @@ -24,9 +24,8 @@ tailscale_serve_services: upstream: http://localhost:5050 # Kubernetes API server (TCP passthrough for mTLS) - # NOTE: Port is dynamic with podman driver - check with: - # ssh indri "kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'" + # With qemu2 driver, API server is inside VM at 192.168.105.2:6443 - name: svc:k8s tcp: port: 443 - upstream: tcp://localhost:44491 + upstream: tcp://192.168.105.2:6443 -- 2.50.1 (Apple Git-155) From 5724b61fb44f457f9b63909d8ad46d17befd270a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 13:27:27 -0800 Subject: [PATCH 08/16] save some work --- bin/kubectl-credential-1password | 31 -- plans/k8s-migration/P5.1_qemu2_migration.md | 462 ++++++++++---------- 2 files changed, 243 insertions(+), 250 deletions(-) delete mode 100755 bin/kubectl-credential-1password diff --git a/bin/kubectl-credential-1password b/bin/kubectl-credential-1password deleted file mode 100755 index 04f2669..0000000 --- a/bin/kubectl-credential-1password +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# kubectl exec credential plugin for 1Password -# Usage: kubectl-credential-1password -# -# Fetches client certificate and key from 1Password and outputs -# ExecCredential JSON for kubectl authentication. - -set -euo pipefail - -VAULT_ID="$1" -ITEM_ID="$2" -CERT_FIELD="$3" -KEY_FIELD="$4" - -# Fetch credentials from 1Password (strips surrounding quotes from text fields) -CLIENT_CERT=$(op --vault "$VAULT_ID" item get "$ITEM_ID" --fields "$CERT_FIELD" | sed 's/^"//; s/"$//') -CLIENT_KEY=$(op --vault "$VAULT_ID" item get "$ITEM_ID" --fields "$KEY_FIELD" | sed 's/^"//; s/"$//') - -# Output ExecCredential JSON -# Note: jq is used to properly escape the PEM data for JSON -jq -n \ - --arg cert "$CLIENT_CERT" \ - --arg key "$CLIENT_KEY" \ - '{ - "apiVersion": "client.authentication.k8s.io/v1beta1", - "kind": "ExecCredential", - "status": { - "clientCertificateData": $cert, - "clientKeyData": $key - } - }' diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index d4156c7..63f12e9 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -1,8 +1,8 @@ -# Phase 5.1: Migrate Minikube from Podman to QEMU2 Driver +# Phase 5.1: Migrate Minikube from QEMU2 to Docker Driver -**Goal**: Replace the podman driver with qemu2 to enable proper volume mounts (hostPath, NFS, SMB CSI) +**Goal**: Replace the qemu2 driver with docker to fix remote API access and simplify volume mounts -**Status**: Complete (2026-01-21) +**Status**: In Progress (2026-01-21) **Prerequisites**: [Phase 5](P5_devpi.complete.md) complete @@ -10,307 +10,331 @@ ## Background +### Original Problem (Podman → QEMU2) + During Phase 6 (Kiwix/Transmission migration), we discovered that the **podman driver has fundamental limitations** that prevent mounting external volumes: 1. **SMB CSI driver fails** with "Operation not permitted" - the rootless container lacks kernel-level mount capabilities 2. **`minikube mount` fails** - 9p mount gets "permission denied" inside the podman VM 3. **hostPath volumes** only work for paths inside the minikube container, not the macOS host -These are documented limitations of the podman driver, which is labeled "experimental" in the [minikube documentation](https://minikube.sigs.k8s.io/docs/drivers/podman/). +We migrated to QEMU2 to get a full VM with kernel capabilities. -### Failed P6 Attempt +### New Problem (QEMU2 → Docker) -Branch `feature/p6-kiwix-transmission` contains the P6 implementation that was blocked by these issues. The manifests are complete and tested, but couldn't mount the torrents volume. +The QEMU2 driver introduced a **new problem**: the Kubernetes API server is inside the VM at `192.168.105.2:6443`, and Tailscale's TCP proxy cannot forward to it properly: -**What was tried:** -- NFS volume mounts - failed due to missing CAP_SYS_ADMIN in podman container -- SMB CSI driver (v1.17.0) - mount fails with EPERM (same root cause) -- `minikube mount /Volumes/torrents:/Volumes/torrents` - 9p mount permission denied -- hostPath PV pointing to `/Volumes/torrents` - path doesn't exist inside minikube container -- Installing cifs-utils in minikube VM - still fails at kernel level +- TCP connections succeed (nc -zv works) +- TLS handshake times out +- Root cause unknown, but likely related to Tailscale serve's handling of non-localhost upstreams -All of these failures trace back to the same root cause: the podman driver runs minikube in a rootless container that lacks the kernel capabilities required for filesystem mounts. +Additionally, the volume mount solution with QEMU2 was complex: +- Required NFS mount from sifaka → indri +- Then `minikube mount` to pass through to VM +- Two LaunchAgents/LaunchDaemons for persistence +- macOS GUI approval required for network access -### Why QEMU2? +### Why Docker? -Multiple sources recommend QEMU2 as the best driver for Apple Silicon Macs: +The **docker driver** solves both problems: -> "Qemu emulator is the best option to run a Kubernetes Cluster using minikube on MAC arm64-based systems without any issues." -> — [DevOpsCube](https://devopscube.com/minikube-mac/) +1. **API Server on localhost**: Docker Desktop handles port forwarding from container to localhost automatically, so `tailscale serve --tcp=443 tcp://localhost:PORT` will work (like podman did) -QEMU2 creates an actual VM (not a container), which has: -- Full kernel capabilities for mounts -- Proper 9p/virtio filesystem support -- Native NFS client support +2. **Simpler volume mounts**: Docker Desktop has built-in macOS file sharing. Paths shared with Docker are accessible inside containers, and minikube (running in Docker) can use those paths via hostPath. + +3. **Official Tailscale recommendation**: Tailscale's own [Kubernetes guide](https://tailscale.com/learn/managing-access-to-kubernetes-with-tailscale) uses minikube with the docker driver. --- -## Prerequisites (Manual Steps) +## Prerequisites -### Create Synology User for Kubernetes Storage Access +### 1. Install Docker Desktop (Manual - Before Ansible) -Create a dedicated Synology user for k8s NFS/SMB access (do not use personal account): +Docker Desktop requires GUI setup, so install manually first: -On Synology DSM (Control Panel → User & Group): -1. Create new user: `k8s-storage` - - Set a strong password - - No admin privileges needed - - Deny access to all applications (only needs file services) -2. Set permissions on the `torrents` share: - - Give `k8s-storage` user Read/Write access -3. Store credentials in 1Password: - - Vault: `vg6xf6vvfmoh5hqjjhlhbeoaie` (blumeops vault) - - Item name: `synology-k8s-storage` - - Fields: `username` (k8s-storage), `password` +```bash +# On indri: +brew install --cask docker-desktop + +# Then launch Docker Desktop from /Applications +# Complete the setup wizard (accept license, skip tutorial) +# Wait for Docker to be "Running" (green icon in menu bar) + +# Verify: +docker version +docker run hello-world +``` + +**File Sharing Configuration** (in Docker Desktop → Settings → Resources → File sharing): +- Ensure `/Volumes` is shared (for future NFS mounts from sifaka) +- Or add specific paths as needed for P6 + +### 2. Stop Current QEMU2 Minikube + +```bash +# On indri: +minikube stop +minikube delete + +# Verify QEMU resources are cleaned up +ps aux | grep qemu +``` --- ## Plan -### 1. Export Current State +### 1. Update Ansible Role for Docker Driver -Before destroying the cluster, capture the current state: +**Changes to `ansible/roles/minikube/defaults/main.yml`:** -```bash -# List all ArgoCD apps and their sync status -argocd app list +```yaml +# Change from: +minikube_driver: qemu2 +minikube_network: socket_vmnet +minikube_container_runtime: containerd -# Backup any runtime state that matters (should be minimal - everything is in git) -kubectl --context=minikube-indri get all --all-namespaces -o yaml > /tmp/k8s-backup.yaml +# To: +minikube_driver: docker +minikube_container_runtime: docker # or containerd, both work ``` -### 2. Stop and Delete Podman Minikube +**Remove from defaults:** +- `minikube_network` (not needed for docker driver) -```bash -# Stop the cluster -minikube stop +**Changes to `ansible/roles/minikube/tasks/main.yml`:** +- Remove qemu installation +- Remove socket_vmnet installation and service management +- Remove NFS mount point creation +- Remove NFS LaunchDaemon installation +- Remove minikube mount LaunchAgent installation +- Keep containerd registry mirror config (adapting for docker if needed) -# Delete the cluster and all data -minikube delete +**Remove files from `ansible/roles/minikube/files/`:** +- `com.blumeops.nfs-torrents.plist` +- `com.blumeops.minikube-mount.plist` -# Verify podman VM is cleaned up -podman machine list +**Changes to `ansible/roles/minikube/handlers/main.yml`:** +- Remove `Load NFS mount LaunchDaemon` +- Remove `Load minikube mount LaunchAgent` + +**Add to Brewfile:** +```ruby +cask "docker" # Docker Desktop ``` -### 3. Update Ansible Roles for QEMU2 +### 2. Update Tailscale Serve Configuration -The installation must be orchestrated via ansible, following the existing patterns for `podman` and `minikube` roles. +**Changes to `ansible/roles/tailscale_serve/defaults/main.yml`:** -**Changes needed:** +```yaml +# Change svc:k8s upstream from VM IP back to localhost: +- name: svc:k8s + tcp: + port: 443 + upstream: tcp://localhost:PORT # PORT will be dynamic, see below +``` -1. **Update `ansible/roles/minikube/` role:** - - Change driver from `podman` to `qemu2` - - Add QEMU as a dependency (via Brewfile or role) - - Optionally add socket_vmnet for full networking support - - Update any driver-specific configuration +**Note on API server port**: With the docker driver, the API server port is dynamic (assigned by minikube). We need to either: +- Use `--apiserver-port=6443` to fix it +- Or query and update the config after cluster creation -2. **Update `Brewfile`:** - ```ruby - brew "qemu" - # Optional: brew "socket_vmnet" - ``` - -3. **Update minikube start command in role:** - ```bash - minikube start \ - --driver=qemu2 \ - --cpus=4 \ - --memory=8192 \ - --disk-size=50g \ - --container-runtime=containerd \ - --kubernetes-version=stable - ``` - -4. **Remove or update podman role** (may still be useful for container builds) - -### 4. Run Ansible to Create QEMU2 Cluster +### 3. Create Docker Minikube Cluster ```bash -# Run the updated minikube role -mise run provision-indri -- --tags minikube +# On indri (after Docker Desktop is running): +minikube start \ + --driver=docker \ + --cpus=6 \ + --memory=12288 \ + --disk-size=200g \ + --apiserver-names=k8s.tail8d86e.ts.net,indri \ + --apiserver-port=6443 \ + --listen-address=0.0.0.0 -# Verify cluster is running +# Verify cluster minikube status kubectl get nodes ``` -### 5. Configure Host Path Access - -With QEMU2, we need to either: - -**Option A: Use `minikube mount` (9p)** -```bash -# Start persistent mount (run in background or via launchd) -minikube mount /Volumes/torrents:/Volumes/torrents & -``` - -**Option B: Use NFS export from macOS** -```bash -# Add NFS export on macOS -echo "/Volumes/torrents -alldirs -mapall=$(id -u):$(id -g) -network 192.168.0.0 -mask 255.255.0.0" | sudo tee -a /etc/exports -sudo nfsd restart - -# In k8s, use NFS volume type directly -``` - -### 6. Test Volume Mount with Test Pod - -Create a test pod that mounts the torrents volume: - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: volume-test - namespace: default -spec: - containers: - - name: test - image: busybox - command: ["sh", "-c", "ls -la /data && sleep 3600"] - volumeMounts: - - name: torrents - mountPath: /data - volumes: - - name: torrents - hostPath: - path: /Volumes/torrents - type: Directory -``` - -Verify: -```bash -kubectl apply -f volume-test.yaml -kubectl logs volume-test -kubectl exec volume-test -- ls -la /data -``` - -### 7. Redeploy ArgoCD and Existing Apps +### 4. Verify API Server is on Localhost ```bash -# Re-add ArgoCD +# Check what port the API server is on +kubectl config view --minify -o jsonpath="{.clusters[0].cluster.server}" +# Should show https://127.0.0.1:PORT or similar + +# Verify local access works +curl -k https://localhost:6443/healthz +# Should return "ok" +``` + +### 5. Update 1Password Credentials + +After cluster recreation, update the credentials in 1Password: + +```bash +# On indri, get the new certificates: +cat ~/.minikube/profiles/minikube/client.crt +cat ~/.minikube/profiles/minikube/client.key +cat ~/.minikube/ca.crt +``` + +Update in 1Password (vault: `vg6xf6vvfmoh5hqjjhlhbeoaie`, item: `3jo4f2hnzvwfmamudfsbbbec7e`). + +### 6. Update Kubeconfig on Gilbert + +```bash +# Fetch new CA cert from 1Password +op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 3jo4f2hnzvwfmamudfsbbbec7e --fields ca-cert | sed 's/^"//; s/"$//' > ~/.kube/minikube-indri/ca.crt +``` + +### 7. Configure Tailscale Serve for K8s + +```bash +# On indri: +tailscale serve --service="svc:k8s" --tcp=443 tcp://localhost:6443 +``` + +### 8. Verify Remote Access + +```bash +# From gilbert: +curl -k --connect-timeout 5 https://k8s.tail8d86e.ts.net/healthz +# Should return "ok" + +kubectl --context=minikube-indri get nodes +# Should show the minikube node +``` + +### 9. Redeploy ArgoCD and Apps + +Since this is a cluster recreation, we need to re-bootstrap: + +```bash +# On indri - apply secrets first +op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f - + +# Create repo secret for ArgoCD +PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' kubectl create namespace argocd -kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml +kubectl create secret generic repo-forge -n argocd \ + --from-literal=type=git \ + --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \ + --from-literal=insecure=true \ + --from-literal=sshPrivateKey="$PRIV_KEY" +kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository -# Wait for ArgoCD to be ready +# Bootstrap operators +kubectl create namespace tailscale +kubectl apply -k argocd/manifests/tailscale-operator/ +kubectl apply -k argocd/manifests/argocd/ + +# Wait for ArgoCD kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s -# Re-configure ArgoCD (repo credentials, etc.) -# ... follow P1 setup steps ... - -# Sync all apps +# Login and sync apps +argocd login argocd.tail8d86e.ts.net --username admin --grpc-web argocd app sync apps +argocd app sync tailscale-operator +argocd app sync cloudnative-pg +argocd app sync blumeops-pg +argocd app sync grafana +argocd app sync grafana-config +argocd app sync miniflux +argocd app sync devpi ``` -### 8. Verify All Services +### 10. Verify All Services ```bash -# Run health check mise run indri-services-check - -# Verify each k8s service argocd app list kubectl get pods --all-namespaces ``` -### 9. Clean Up Test Pod +--- -```bash -kubectl delete pod volume-test -``` +## Volume Mounts for P6 (Kiwix/Transmission) + +With the docker driver, volume mounts work differently than QEMU2: + +**Option A: Docker Desktop File Sharing + hostPath** +1. Mount sifaka NFS share on indri: `/Volumes/torrents` +2. Add `/Volumes/torrents` to Docker Desktop file sharing +3. Pods use hostPath pointing to that path + +**Option B: NFS directly from pods** +- Docker containers can make NFS mounts (unlike podman's rootless containers) +- May need to test if sifaka allows connections from the Docker network + +This will be fully tested in Phase 6. + +--- + +## Cleanup + +After successful migration: + +1. **Remove QEMU2 artifacts:** + ```bash + brew uninstall qemu socket_vmnet + ``` + +2. **Remove podman if no longer needed:** + ```bash + podman machine stop + podman machine rm + brew uninstall podman + ``` --- ## Verification Checklist -- [ ] Podman minikube deleted -- [ ] QEMU2 minikube running -- [ ] `minikube mount` or NFS working -- [ ] Test pod can read `/Volumes/torrents` +- [ ] Docker Desktop installed and running on indri +- [ ] QEMU2 minikube deleted +- [ ] Docker minikube running +- [ ] API server accessible on localhost:6443 +- [ ] Tailscale serve configured for svc:k8s → localhost:6443 +- [ ] Remote kubectl access working from gilbert - [ ] ArgoCD redeployed and synced - [ ] All existing apps healthy (grafana, miniflux, devpi, etc.) - [ ] PostgreSQL cluster healthy -- [ ] Test pod deleted -- [ ] `mise run indri-services-check` passes (except intentionally offline services) +- [ ] `mise run indri-services-check` passes --- ## Rollback Plan -If QEMU2 doesn't work: +If Docker driver doesn't work: -1. Delete QEMU2 cluster: `minikube delete` -2. Recreate podman cluster following P0/P1 steps -3. Redeploy apps from git - -All state is in git, so cluster recreation is straightforward. +1. Delete Docker minikube: `minikube delete` +2. Recreate QEMU2 cluster (restore old ansible config from git) +3. Accept the Tailscale TCP forwarding limitation and use SSH tunnel for remote kubectl --- ## Notes -- The QEMU2 VM will use more resources than podman (actual VM vs container) -- First boot may be slower due to VM initialization -- socket_vmnet provides better networking but requires sudo setup -- Consider creating a LaunchAgent for `minikube mount` if using that approach +- Docker Desktop has resource overhead but provides better macOS integration +- The docker driver is more widely used and tested than qemu2 +- File sharing permissions may need adjustment in Docker Desktop settings +- First cluster start may be slow as Docker pulls the minikube base image ## Implementation Notes (2026-01-21) -### What Actually Worked +### QEMU2 Cleanup Done -**Volume mounting solution**: NFS mount on indri (host) + `minikube mount` to pass through to VM +Removed from indri: +- `/Library/LaunchDaemons/com.blumeops.nfs-torrents.plist` - NFS mount daemon +- `~/Library/LaunchAgents/com.blumeops.minikube-mount.plist` - minikube mount agent +- Unmounted `/Volumes/torrents-nfs` NFS mount +- Removed `/Volumes/torrents-nfs` mount point -1. Mount sifaka's torrents share on indri via NFS: `sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents-nfs` -2. Run `minikube mount /Volumes/torrents-nfs:/mnt/torrents` from indri console (GUI session required due to macOS security) -3. Pods can access `/mnt/torrents` via hostPath +### Previous QEMU2 Issues -**Why NFS from inside VM didn't work**: Despite allowing 192.168.105.0/24 in Synology NFS settings, the VM got "access denied". Root cause unknown - may be Synology NFS quirk. - -**Why SMB didn't work**: The minikube containerd kernel doesn't include the CIFS module. - -### Zot Registry Mirror (Implemented) - -The ansible role now configures containerd to redirect `registry.tail8d86e.ts.net` to `host.minikube.internal:5050`: -- Adds hosts file entry in VM -- Creates containerd registry mirror config at `/etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml` - -### Passwordless Sudo on Indri - -Configured passwordless sudo for `erichblume` user to allow ansible `become: true` tasks to run without `-K` flag: - -```bash -echo "erichblume ALL=(ALL) NOPASSWD: ALL" | sudo tee /etc/sudoers.d/erichblume -``` - -This is acceptable given the security model - tailnet access is the trust boundary. - -### macOS Network Permission - -The first time `minikube mount` runs, macOS will show a GUI popup asking to allow network access. This must be approved from the indri console (not SSH). Once approved, subsequent runs won't prompt. - -### Manual Steps Still Required - -These steps cannot be fully automated via ansible and must be done manually: - -1. **socket_vmnet service (once per reboot)**: - ```bash - # On indri console: - sudo brew services start socket_vmnet - ``` - -2. **NFS mount on indri (once per reboot)**: - ```bash - # On indri console: - sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents-nfs - ``` - -3. **minikube mount (must run in GUI session)**: - ```bash - # On indri console (not SSH - requires GUI session for macOS security): - minikube mount /Volumes/torrents-nfs:/mnt/torrents - # Keep this terminal open - the mount dies if process exits - ``` - -### TODO: LaunchAgent for Persistent Mount - -Create a LaunchAgent to run `minikube mount` at login. Challenge: must run in GUI session context for macOS security model. +The QEMU2 migration partially worked but had a critical issue: +- Volume mounts worked via NFS → indri → minikube mount chain +- But Tailscale TCP proxy to VM IP (192.168.105.2:6443) failed with TLS timeout +- Root cause unknown - TCP connected but TLS handshake never completed -- 2.50.1 (Apple Git-155) From 201c90b27e798252da47024451f85ff030af3c8d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 13:44:30 -0800 Subject: [PATCH 09/16] Add mise task for minikube-indri kubectl config Creates reusable script that fetches certificates from indri and sets up kubeconfig at ~/.kube/minikube-indri/config.yml for remote kubectl access. Part of P5.1 migration to docker driver. Co-Authored-By: Claude Opus 4.5 --- .../ensure-minikube-indri-kubectl-config | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 mise-tasks/ensure-minikube-indri-kubectl-config diff --git a/mise-tasks/ensure-minikube-indri-kubectl-config b/mise-tasks/ensure-minikube-indri-kubectl-config new file mode 100755 index 0000000..131bc4d --- /dev/null +++ b/mise-tasks/ensure-minikube-indri-kubectl-config @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +#MISE description="Ensure kubectl config for minikube-indri is set up on this workstation" + +set -euo pipefail + +CONFIG_DIR="$HOME/.kube/minikube-indri" +CONFIG_FILE="$CONFIG_DIR/config.yml" + +echo "Ensuring minikube-indri kubectl config..." + +# Create directory if needed +mkdir -p "$CONFIG_DIR" + +# Fetch certificates from indri +echo "Fetching certificates from indri..." +CA_CERT=$(ssh indri 'cat ~/.minikube/ca.crt') +CLIENT_CERT=$(ssh indri 'cat ~/.minikube/profiles/minikube/client.crt') +CLIENT_KEY=$(ssh indri 'cat ~/.minikube/profiles/minikube/client.key') + +# Write certificate files +echo "$CA_CERT" > "$CONFIG_DIR/ca.crt" +echo "$CLIENT_CERT" > "$CONFIG_DIR/client.crt" +echo "$CLIENT_KEY" > "$CONFIG_DIR/client.key" +chmod 600 "$CONFIG_DIR/client.key" + +# Write kubeconfig +cat > "$CONFIG_FILE" << EOF +apiVersion: v1 +kind: Config +clusters: +- cluster: + certificate-authority: $CONFIG_DIR/ca.crt + server: https://k8s.tail8d86e.ts.net + name: minikube-indri +contexts: +- context: + cluster: minikube-indri + user: minikube-indri + name: minikube-indri +current-context: minikube-indri +users: +- name: minikube-indri + user: + client-certificate: $CONFIG_DIR/client.crt + client-key: $CONFIG_DIR/client.key +EOF + +echo "Config written to $CONFIG_FILE" + +# Warn if KUBECONFIG doesn't include this file +if [[ -z "${KUBECONFIG:-}" ]] || [[ ":$KUBECONFIG:" != *":$CONFIG_FILE:"* ]]; then + echo "" + echo "WARNING: KUBECONFIG does not include $CONFIG_FILE" + echo "Add this to your shell config:" + echo " export KUBECONFIG=\"\$KUBECONFIG:$CONFIG_FILE\"" +fi + +echo "" +echo "Test with: kubectl --context=minikube-indri get nodes" -- 2.50.1 (Apple Git-155) From 9fac4439b1daff4e38f5f82914460f87ef0f0e02 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 13:52:52 -0800 Subject: [PATCH 10/16] Migrate minikube ansible role from qemu2 to docker driver - Change driver from qemu2 to docker - Remove socket_vmnet and qemu dependencies - Remove NFS mount and minikube mount LaunchAgent/LaunchDaemon - Remove old podman zot-mirror.conf - Update containerd registry mirror config for docker driver - Uses host.minikube.internal:5050 to reach zot - Configures pull-through cache for docker.io, ghcr.io, quay.io - Add dynamic tailscale serve configuration for k8s API (port is dynamic with docker driver, not fixed at 6443) - Remove svc:k8s from tailscale_serve defaults (minikube role handles it) Co-Authored-By: Claude Opus 4.5 --- ansible/roles/minikube/defaults/main.yml | 11 +- .../files/com.blumeops.minikube-mount.plist | 41 ---- .../files/com.blumeops.nfs-torrents.plist | 24 -- ansible/roles/minikube/files/zot-mirror.conf | 43 ---- ansible/roles/minikube/handlers/main.yml | 13 - ansible/roles/minikube/tasks/main.yml | 230 +++++++++++------- .../roles/tailscale_serve/defaults/main.yml | 8 +- 7 files changed, 149 insertions(+), 221 deletions(-) delete mode 100644 ansible/roles/minikube/files/com.blumeops.minikube-mount.plist delete mode 100644 ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist delete mode 100644 ansible/roles/minikube/files/zot-mirror.conf diff --git a/ansible/roles/minikube/defaults/main.yml b/ansible/roles/minikube/defaults/main.yml index e753f38..2558c91 100644 --- a/ansible/roles/minikube/defaults/main.yml +++ b/ansible/roles/minikube/defaults/main.yml @@ -1,15 +1,16 @@ --- # Minikube cluster configuration +# Uses docker driver - requires Docker Desktop to be installed and running +# with at least 12GB memory allocated in Docker Desktop settings minikube_cpus: 6 -minikube_memory: 12288 +minikube_memory: 11264 # Leave ~1GB headroom for Docker Desktop overhead minikube_disk_size: "200g" -minikube_driver: qemu2 -minikube_network: socket_vmnet -minikube_container_runtime: containerd +minikube_driver: docker +minikube_container_runtime: docker # Remote access configuration # These allow kubectl from other machines (e.g., gilbert) to connect -# k8s.tail8d86e.ts.net is exposed via Tailscale service (TCP passthrough) +# k8s.tail8d86e.ts.net is exposed via Tailscale service (TCP passthrough to localhost) minikube_apiserver_names: - k8s.tail8d86e.ts.net - indri diff --git a/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist b/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist deleted file mode 100644 index 2313c52..0000000 --- a/ansible/roles/minikube/files/com.blumeops.minikube-mount.plist +++ /dev/null @@ -1,41 +0,0 @@ - - - - - Label - com.blumeops.minikube-mount - ProgramArguments - - /bin/bash - -c - -# Wait for minikube to be running -for i in {1..60}; do - if /opt/homebrew/bin/minikube status | grep -q "Running"; then - break - fi - sleep 5 -done - -# Wait for NFS mount to be available -for i in {1..30}; do - if mount | grep -q "/Volumes/torrents-nfs"; then - break - fi - sleep 2 -done - -# Start the mount (this blocks until killed) -exec /opt/homebrew/bin/minikube mount /Volumes/torrents-nfs:/mnt/torrents - - - RunAtLoad - - KeepAlive - - StandardErrorPath - /tmp/minikube-mount.err - StandardOutPath - /tmp/minikube-mount.log - - diff --git a/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist b/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist deleted file mode 100644 index 1cec7a6..0000000 --- a/ansible/roles/minikube/files/com.blumeops.nfs-torrents.plist +++ /dev/null @@ -1,24 +0,0 @@ - - - - - Label - com.blumeops.nfs-torrents - ProgramArguments - - /sbin/mount - -t - nfs - -o - resvport,rw - sifaka:/volume1/torrents - /Volumes/torrents-nfs - - RunAtLoad - - StandardErrorPath - /tmp/nfs-torrents.err - StandardOutPath - /tmp/nfs-torrents.log - - diff --git a/ansible/roles/minikube/files/zot-mirror.conf b/ansible/roles/minikube/files/zot-mirror.conf deleted file mode 100644 index df9bc11..0000000 --- a/ansible/roles/minikube/files/zot-mirror.conf +++ /dev/null @@ -1,43 +0,0 @@ -# Zot pull-through cache on indri -# Uses host.containers.internal which is stable across restarts -# Applied by ansible minikube role - -# Direct access to Zot for private images (blumeops/*) -[[registry]] -prefix = "host.containers.internal:5050" -location = "host.containers.internal:5050" -insecure = true - -# Tailscale hostname for Zot - redirects to local access -# Allows manifests to use registry.tail8d86e.ts.net which is cleaner -[[registry]] -prefix = "registry.tail8d86e.ts.net" -location = "registry.tail8d86e.ts.net" - -[[registry.mirror]] -location = "host.containers.internal:5050" -insecure = true - -[[registry]] -prefix = "docker.io" -location = "docker.io" - -[[registry.mirror]] -location = "host.containers.internal:5050/docker.io" -insecure = true - -[[registry]] -prefix = "ghcr.io" -location = "ghcr.io" - -[[registry.mirror]] -location = "host.containers.internal:5050/ghcr.io" -insecure = true - -[[registry]] -prefix = "quay.io" -location = "quay.io" - -[[registry.mirror]] -location = "host.containers.internal:5050/quay.io" -insecure = true diff --git a/ansible/roles/minikube/handlers/main.yml b/ansible/roles/minikube/handlers/main.yml index 7d62a8b..44ad747 100644 --- a/ansible/roles/minikube/handlers/main.yml +++ b/ansible/roles/minikube/handlers/main.yml @@ -12,16 +12,3 @@ ansible.builtin.command: cmd: minikube ssh --native-ssh=false "sudo systemctl restart containerd" changed_when: true - -- name: Load NFS mount LaunchDaemon - ansible.builtin.command: - cmd: launchctl load /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist - become: true - failed_when: false - changed_when: true - -- name: Load minikube mount LaunchAgent - ansible.builtin.command: - cmd: launchctl load {{ ansible_facts['env']['HOME'] }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist - failed_when: false - changed_when: true diff --git a/ansible/roles/minikube/tasks/main.yml b/ansible/roles/minikube/tasks/main.yml index a471d82..9e9fdd3 100644 --- a/ansible/roles/minikube/tasks/main.yml +++ b/ansible/roles/minikube/tasks/main.yml @@ -1,41 +1,20 @@ --- # Minikube installation and cluster setup for indri -# Uses qemu2 driver for full VM with kernel mount capabilities (NFS, SMB, etc.) -# Requires socket_vmnet for proper networking (minikube service/tunnel commands) +# Uses docker driver - requires Docker Desktop to be installed manually first +# (Docker Desktop requires GUI setup, so it's not automated in this role) +# +# Prerequisites: +# 1. Install Docker Desktop: brew install --cask docker +# 2. Launch Docker Desktop and complete setup wizard +# 3. Configure Docker Desktop with at least 12GB memory # # NOTE: minikube start may have issues when run via SSH. # If cluster fails to start, manually run on indri: -# minikube start --driver=qemu2 --network=socket_vmnet --container-runtime=containerd \ -# --cpus=6 --memory=12288 --disk-size=200g \ +# minikube start --driver=docker --container-runtime=docker \ +# --cpus=6 --memory=11264 --disk-size=200g \ # --apiserver-names=k8s.tail8d86e.ts.net --apiserver-names=indri \ # --apiserver-port=6443 --listen-address=0.0.0.0 -- name: Install qemu via homebrew (required for qemu2 driver) - community.general.homebrew: - name: qemu - state: present - -- name: Install socket_vmnet via homebrew (required for qemu2 networking) - community.general.homebrew: - name: socket_vmnet - state: present - -- name: Check if socket_vmnet process is running - ansible.builtin.command: - cmd: pgrep socket_vmnet - register: minikube_socket_vmnet_status - changed_when: false - failed_when: false - -- name: Start socket_vmnet service - ansible.builtin.command: - cmd: brew services start socket_vmnet - become: true - register: minikube_socket_vmnet_start - changed_when: "'Successfully started' in minikube_socket_vmnet_start.stdout" - failed_when: false - when: minikube_socket_vmnet_status.rc != 0 - - name: Install minikube via homebrew community.general.homebrew: name: minikube @@ -46,6 +25,18 @@ name: kubectl state: present +- name: Check if Docker is running + ansible.builtin.command: + cmd: docker info + register: minikube_docker_status + changed_when: false + failed_when: false + +- name: Warn if Docker is not running + ansible.builtin.debug: + msg: "WARNING: Docker does not appear to be running. Please start Docker Desktop manually." + when: minikube_docker_status.rc != 0 + - name: Check if minikube cluster exists ansible.builtin.command: cmd: minikube status --format={% raw %}'{{.Host}}'{% endraw %} @@ -58,7 +49,6 @@ cmd: > minikube start --driver={{ minikube_driver }} - --network={{ minikube_network }} --container-runtime={{ minikube_container_runtime }} --cpus={{ minikube_cpus }} --memory={{ minikube_memory }} @@ -70,8 +60,10 @@ --listen-address={{ minikube_listen_address }} register: minikube_start changed_when: minikube_start.rc == 0 - failed_when: false # Don't fail - may need manual intervention like podman - when: minikube_status.rc != 0 or 'Running' not in minikube_status.stdout + failed_when: false # Don't fail - may need manual intervention + when: + - minikube_docker_status.rc == 0 + - minikube_status.rc != 0 or 'Running' not in minikube_status.stdout - name: Check minikube status after start attempt ansible.builtin.command: @@ -85,84 +77,146 @@ msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}" when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout -# Configure VM to access zot registry on host -# The VM can't resolve Tailscale hostnames, so we add a hosts entry -# and configure containerd to use the local zot instance -- name: Add registry hostname to VM hosts file - ansible.builtin.command: - cmd: minikube ssh --native-ssh=false "grep -q 'registry.tail8d86e.ts.net' /etc/hosts || echo '192.168.105.1 registry.tail8d86e.ts.net' | sudo tee -a /etc/hosts" - register: minikube_hosts_entry - changed_when: "'registry.tail8d86e.ts.net' in minikube_hosts_entry.stdout" - when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout +# Configure containerd to use zot registry as pull-through cache +# With docker driver, use host.minikube.internal to reach the host +# Zot runs on indri:5050 and caches images from docker.io, ghcr.io, quay.io -- name: Create containerd registry mirror directory +- name: Create containerd registry mirror directories ansible.builtin.command: - cmd: minikube ssh --native-ssh=false "sudo mkdir -p /etc/containerd/certs.d/registry.tail8d86e.ts.net" - register: minikube_registry_dir + cmd: minikube ssh --native-ssh=false "sudo mkdir -p /etc/containerd/certs.d/{{ item }}" + loop: + - registry.tail8d86e.ts.net + - docker.io + - ghcr.io + - quay.io changed_when: false when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Check containerd registry mirror config +# Private registry (registry.tail8d86e.ts.net) - direct to zot +- name: Check registry.tail8d86e.ts.net config ansible.builtin.command: cmd: minikube ssh --native-ssh=false "cat /etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml 2>/dev/null || echo ''" - register: minikube_registry_config_current + register: minikube_registry_config changed_when: false when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Configure containerd registry mirror for zot +- name: Configure registry.tail8d86e.ts.net mirror ansible.builtin.command: cmd: | minikube ssh --native-ssh=false 'echo "server = \"http://host.minikube.internal:5050\" [host.\"http://host.minikube.internal:5050\"] - capabilities = [\"pull\", \"resolve\"] + capabilities = [\"pull\", \"resolve\", \"push\"] skip_verify = true" | sudo tee /etc/containerd/certs.d/registry.tail8d86e.ts.net/hosts.toml' - register: minikube_registry_config changed_when: true when: - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout - - "'host.minikube.internal:5050' not in minikube_registry_config_current.stdout" + - "'host.minikube.internal:5050' not in minikube_registry_config.stdout" notify: Restart containerd in minikube -# Set up persistent NFS mount from sifaka and minikube mount passthrough -# NFS mount uses LaunchDaemon (runs as root at boot) -# Minikube mount uses LaunchAgent (runs in user GUI session at login) -# -# NOTE: Tasks with become:true require passwordless sudo on indri -# (configured via /etc/sudoers.d/erichblume) +# Docker Hub (docker.io) - zot pull-through cache +- name: Check docker.io config + ansible.builtin.command: + cmd: minikube ssh --native-ssh=false "cat /etc/containerd/certs.d/docker.io/hosts.toml 2>/dev/null || echo ''" + register: minikube_dockerio_config + changed_when: false + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Check if NFS mount point exists - ansible.builtin.stat: - path: /Volumes/torrents-nfs - register: minikube_nfs_mount_point +- name: Configure docker.io mirror through zot + ansible.builtin.command: + cmd: | + minikube ssh --native-ssh=false 'echo "server = \"https://registry-1.docker.io\" -- name: Create NFS mount point - ansible.builtin.file: - path: /Volumes/torrents-nfs - state: directory - mode: "0755" - become: true - when: not minikube_nfs_mount_point.stat.exists + [host.\"http://host.minikube.internal:5050\"] + capabilities = [\"pull\", \"resolve\"] + skip_verify = true" | sudo tee /etc/containerd/certs.d/docker.io/hosts.toml' + changed_when: true + when: + - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - "'host.minikube.internal:5050' not in minikube_dockerio_config.stdout" + notify: Restart containerd in minikube -- name: Check if NFS LaunchDaemon is installed - ansible.builtin.stat: - path: /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist - register: minikube_nfs_launchdaemon +# GitHub Container Registry (ghcr.io) - zot pull-through cache +- name: Check ghcr.io config + ansible.builtin.command: + cmd: minikube ssh --native-ssh=false "cat /etc/containerd/certs.d/ghcr.io/hosts.toml 2>/dev/null || echo ''" + register: minikube_ghcr_config + changed_when: false + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout -- name: Install NFS mount LaunchDaemon - ansible.builtin.copy: - src: com.blumeops.nfs-torrents.plist - dest: /Library/LaunchDaemons/com.blumeops.nfs-torrents.plist - owner: root - group: wheel - mode: "0644" - become: true - notify: Load NFS mount LaunchDaemon - when: not minikube_nfs_launchdaemon.stat.exists +- name: Configure ghcr.io mirror through zot + ansible.builtin.command: + cmd: | + minikube ssh --native-ssh=false 'echo "server = \"https://ghcr.io\" -- name: Install minikube mount LaunchAgent - ansible.builtin.copy: - src: com.blumeops.minikube-mount.plist - dest: "{{ ansible_facts['env']['HOME'] }}/Library/LaunchAgents/com.blumeops.minikube-mount.plist" - mode: "0644" - notify: Load minikube mount LaunchAgent + [host.\"http://host.minikube.internal:5050\"] + capabilities = [\"pull\", \"resolve\"] + skip_verify = true" | sudo tee /etc/containerd/certs.d/ghcr.io/hosts.toml' + changed_when: true + when: + - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - "'host.minikube.internal:5050' not in minikube_ghcr_config.stdout" + notify: Restart containerd in minikube + +# Quay.io - zot pull-through cache +- name: Check quay.io config + ansible.builtin.command: + cmd: minikube ssh --native-ssh=false "cat /etc/containerd/certs.d/quay.io/hosts.toml 2>/dev/null || echo ''" + register: minikube_quay_config + changed_when: false + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + +- name: Configure quay.io mirror through zot + ansible.builtin.command: + cmd: | + minikube ssh --native-ssh=false 'echo "server = \"https://quay.io\" + + [host.\"http://host.minikube.internal:5050\"] + capabilities = [\"pull\", \"resolve\"] + skip_verify = true" | sudo tee /etc/containerd/certs.d/quay.io/hosts.toml' + changed_when: true + when: + - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - "'host.minikube.internal:5050' not in minikube_quay_config.stdout" + notify: Restart containerd in minikube + +# Configure Tailscale serve for k8s API access +# With docker driver, the API server port is dynamic (not fixed at 6443) +# We query the current port and configure tailscale serve accordingly +- name: Get minikube API server URL + ansible.builtin.command: + cmd: kubectl config view --minify -o jsonpath="{.clusters[0].cluster.server}" + register: minikube_api_url + changed_when: false + when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + +- name: Extract API server port from URL + ansible.builtin.set_fact: + minikube_api_port: "{{ minikube_api_url.stdout | regex_search(':([0-9]+)$', '\\1') | first }}" + when: + - minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout + - minikube_api_url.stdout is defined + +- name: Check current tailscale serve config for k8s + ansible.builtin.command: + cmd: tailscale serve status --json + register: minikube_tailscale_serve_status + changed_when: false + when: minikube_api_port is defined + +- name: Parse tailscale serve k8s config + ansible.builtin.set_fact: + minikube_tailscale_k8s_tcp: "{{ ((minikube_tailscale_serve_status.stdout | from_json).Services['svc:k8s'].TCP['443'].TCPForward | default('')) }}" + when: + - minikube_api_port is defined + - minikube_tailscale_serve_status.stdout is defined + - "'svc:k8s' in (minikube_tailscale_serve_status.stdout | from_json).Services | default({})" + failed_when: false + +- name: Configure tailscale serve for k8s API + ansible.builtin.command: + cmd: tailscale serve --service="svc:k8s" --tcp=443 tcp://localhost:{{ minikube_api_port }} + when: + - minikube_api_port is defined + - minikube_tailscale_k8s_tcp is not defined or minikube_tailscale_k8s_tcp != 'localhost:' + minikube_api_port + changed_when: true diff --git a/ansible/roles/tailscale_serve/defaults/main.yml b/ansible/roles/tailscale_serve/defaults/main.yml index e9c5970..3f775fd 100644 --- a/ansible/roles/tailscale_serve/defaults/main.yml +++ b/ansible/roles/tailscale_serve/defaults/main.yml @@ -4,6 +4,7 @@ tailscale_serve_services: # NOTE: svc:grafana, svc:pg, svc:feed, svc:pypi removed - now hosted in k8s + # NOTE: svc:k8s is configured by the minikube role (port is dynamic with docker driver) - name: svc:forge https: @@ -22,10 +23,3 @@ tailscale_serve_services: https: port: 443 upstream: http://localhost:5050 - - # Kubernetes API server (TCP passthrough for mTLS) - # With qemu2 driver, API server is inside VM at 192.168.105.2:6443 - - name: svc:k8s - tcp: - port: 443 - upstream: tcp://192.168.105.2:6443 -- 2.50.1 (Apple Git-155) From 75f945385c14a61ba3dd12eaa146e0e53045893b Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 14:05:26 -0800 Subject: [PATCH 11/16] Update P5.1 plan with completion status and P6 storage options - Document completed steps (docker driver working, kubectl access, ansible updated) - Add detailed analysis of volume mount options for P6 - Recommend hostPath via Docker Desktop file sharing as simplest approach - Document why direct NFS won't work (Docker network isolation) - Include sample LaunchDaemon for persistent NFS mount Co-Authored-By: Claude Opus 4.5 --- plans/k8s-migration/P5.1_qemu2_migration.md | 475 +++++++++----------- 1 file changed, 206 insertions(+), 269 deletions(-) diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index 63f12e9..e37b65b 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -2,7 +2,7 @@ **Goal**: Replace the qemu2 driver with docker to fix remote API access and simplify volume mounts -**Status**: In Progress (2026-01-21) +**Status**: In Progress (2026-01-21) - Ansible roles updated, cluster running, awaiting ArgoCD redeploy **Prerequisites**: [Phase 5](P5_devpi.complete.md) complete @@ -38,269 +38,232 @@ Additionally, the volume mount solution with QEMU2 was complex: The **docker driver** solves both problems: -1. **API Server on localhost**: Docker Desktop handles port forwarding from container to localhost automatically, so `tailscale serve --tcp=443 tcp://localhost:PORT` will work (like podman did) +1. **API Server on localhost**: Docker Desktop handles port forwarding from container to localhost automatically, so `tailscale serve --tcp=443 tcp://localhost:PORT` works -2. **Simpler volume mounts**: Docker Desktop has built-in macOS file sharing. Paths shared with Docker are accessible inside containers, and minikube (running in Docker) can use those paths via hostPath. +2. **Simpler volume mounts**: Docker Desktop has built-in macOS file sharing. Paths shared with Docker are accessible inside containers. 3. **Official Tailscale recommendation**: Tailscale's own [Kubernetes guide](https://tailscale.com/learn/managing-access-to-kubernetes-with-tailscale) uses minikube with the docker driver. --- -## Prerequisites +## Implementation Progress -### 1. Install Docker Desktop (Manual - Before Ansible) +### Completed ✅ -Docker Desktop requires GUI setup, so install manually first: +1. **Docker Desktop installed** (manual via `brew install --cask docker`) + - Configured with 12GB memory in Docker Desktop settings + - Kubernetes option disabled (using minikube instead) -```bash -# On indri: -brew install --cask docker-desktop +2. **QEMU2 minikube deleted** (`minikube stop && minikube delete`) -# Then launch Docker Desktop from /Applications -# Complete the setup wizard (accept license, skip tutorial) -# Wait for Docker to be "Running" (green icon in menu bar) - -# Verify: -docker version -docker run hello-world -``` - -**File Sharing Configuration** (in Docker Desktop → Settings → Resources → File sharing): -- Ensure `/Volumes` is shared (for future NFS mounts from sifaka) -- Or add specific paths as needed for P6 - -### 2. Stop Current QEMU2 Minikube - -```bash -# On indri: -minikube stop -minikube delete - -# Verify QEMU resources are cleaned up -ps aux | grep qemu -``` - ---- - -## Plan - -### 1. Update Ansible Role for Docker Driver - -**Changes to `ansible/roles/minikube/defaults/main.yml`:** - -```yaml -# Change from: -minikube_driver: qemu2 -minikube_network: socket_vmnet -minikube_container_runtime: containerd - -# To: -minikube_driver: docker -minikube_container_runtime: docker # or containerd, both work -``` - -**Remove from defaults:** -- `minikube_network` (not needed for docker driver) - -**Changes to `ansible/roles/minikube/tasks/main.yml`:** -- Remove qemu installation -- Remove socket_vmnet installation and service management -- Remove NFS mount point creation -- Remove NFS LaunchDaemon installation -- Remove minikube mount LaunchAgent installation -- Keep containerd registry mirror config (adapting for docker if needed) - -**Remove files from `ansible/roles/minikube/files/`:** -- `com.blumeops.nfs-torrents.plist` -- `com.blumeops.minikube-mount.plist` - -**Changes to `ansible/roles/minikube/handlers/main.yml`:** -- Remove `Load NFS mount LaunchDaemon` -- Remove `Load minikube mount LaunchAgent` - -**Add to Brewfile:** -```ruby -cask "docker" # Docker Desktop -``` - -### 2. Update Tailscale Serve Configuration - -**Changes to `ansible/roles/tailscale_serve/defaults/main.yml`:** - -```yaml -# Change svc:k8s upstream from VM IP back to localhost: -- name: svc:k8s - tcp: - port: 443 - upstream: tcp://localhost:PORT # PORT will be dynamic, see below -``` - -**Note on API server port**: With the docker driver, the API server port is dynamic (assigned by minikube). We need to either: -- Use `--apiserver-port=6443` to fix it -- Or query and update the config after cluster creation - -### 3. Create Docker Minikube Cluster - -```bash -# On indri (after Docker Desktop is running): -minikube start \ - --driver=docker \ - --cpus=6 \ - --memory=12288 \ - --disk-size=200g \ - --apiserver-names=k8s.tail8d86e.ts.net,indri \ - --apiserver-port=6443 \ - --listen-address=0.0.0.0 - -# Verify cluster -minikube status -kubectl get nodes -``` - -### 4. Verify API Server is on Localhost - -```bash -# Check what port the API server is on -kubectl config view --minify -o jsonpath="{.clusters[0].cluster.server}" -# Should show https://127.0.0.1:PORT or similar - -# Verify local access works -curl -k https://localhost:6443/healthz -# Should return "ok" -``` - -### 5. Update 1Password Credentials - -After cluster recreation, update the credentials in 1Password: - -```bash -# On indri, get the new certificates: -cat ~/.minikube/profiles/minikube/client.crt -cat ~/.minikube/profiles/minikube/client.key -cat ~/.minikube/ca.crt -``` - -Update in 1Password (vault: `vg6xf6vvfmoh5hqjjhlhbeoaie`, item: `3jo4f2hnzvwfmamudfsbbbec7e`). - -### 6. Update Kubeconfig on Gilbert - -```bash -# Fetch new CA cert from 1Password -op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 3jo4f2hnzvwfmamudfsbbbec7e --fields ca-cert | sed 's/^"//; s/"$//' > ~/.kube/minikube-indri/ca.crt -``` - -### 7. Configure Tailscale Serve for K8s - -```bash -# On indri: -tailscale serve --service="svc:k8s" --tcp=443 tcp://localhost:6443 -``` - -### 8. Verify Remote Access - -```bash -# From gilbert: -curl -k --connect-timeout 5 https://k8s.tail8d86e.ts.net/healthz -# Should return "ok" - -kubectl --context=minikube-indri get nodes -# Should show the minikube node -``` - -### 9. Redeploy ArgoCD and Apps - -Since this is a cluster recreation, we need to re-bootstrap: - -```bash -# On indri - apply secrets first -op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f - - -# Create repo secret for ArgoCD -PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' -kubectl create namespace argocd -kubectl create secret generic repo-forge -n argocd \ - --from-literal=type=git \ - --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \ - --from-literal=insecure=true \ - --from-literal=sshPrivateKey="$PRIV_KEY" -kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository - -# Bootstrap operators -kubectl create namespace tailscale -kubectl apply -k argocd/manifests/tailscale-operator/ -kubectl apply -k argocd/manifests/argocd/ - -# Wait for ArgoCD -kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s - -# Login and sync apps -argocd login argocd.tail8d86e.ts.net --username admin --grpc-web -argocd app sync apps -argocd app sync tailscale-operator -argocd app sync cloudnative-pg -argocd app sync blumeops-pg -argocd app sync grafana -argocd app sync grafana-config -argocd app sync miniflux -argocd app sync devpi -``` - -### 10. Verify All Services - -```bash -mise run indri-services-check -argocd app list -kubectl get pods --all-namespaces -``` - ---- - -## Volume Mounts for P6 (Kiwix/Transmission) - -With the docker driver, volume mounts work differently than QEMU2: - -**Option A: Docker Desktop File Sharing + hostPath** -1. Mount sifaka NFS share on indri: `/Volumes/torrents` -2. Add `/Volumes/torrents` to Docker Desktop file sharing -3. Pods use hostPath pointing to that path - -**Option B: NFS directly from pods** -- Docker containers can make NFS mounts (unlike podman's rootless containers) -- May need to test if sifaka allows connections from the Docker network - -This will be fully tested in Phase 6. - ---- - -## Cleanup - -After successful migration: - -1. **Remove QEMU2 artifacts:** +3. **Docker minikube cluster created**: ```bash - brew uninstall qemu socket_vmnet + minikube start \ + --driver=docker \ + --container-runtime=docker \ + --cpus=6 \ + --memory=11264 \ + --disk-size=200g \ + --apiserver-names=k8s.tail8d86e.ts.net,indri \ + --apiserver-port=6443 \ + --listen-address=0.0.0.0 + ``` + Note: Memory set to 11264MB (11GB) to leave headroom for Docker Desktop overhead. + +4. **Tailscale serve configured** for k8s API: + - API server on localhost:50820 (port is dynamic with docker driver) + - `tailscale serve --service=svc:k8s --tcp=443 tcp://localhost:50820` + +5. **Remote kubectl access working** from gilbert: + - Created `mise-tasks/ensure-minikube-indri-kubectl-config` script + - Fetches certs from indri and sets up `~/.kube/minikube-indri/config.yml` + - `kubectl --context=minikube-indri get nodes` works + +6. **Ansible roles updated**: + - `ansible/roles/minikube/` - docker driver, removed qemu2/NFS/socket_vmnet + - `ansible/roles/tailscale_serve/` - removed svc:k8s (minikube role handles dynamic port) + - Containerd registry mirrors configured for zot pull-through cache + +7. **QEMU2 artifacts cleaned up**: + - Stopped socket_vmnet service + - Removed NFS LaunchDaemon + - Removed minikube mount LaunchAgent + - kubectl still works after cleanup + +### Remaining 📋 + +1. **Redeploy ArgoCD and apps** - bootstrap the cluster with: + ```bash + # On indri - apply secrets first + op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f - + + # Create repo secret for ArgoCD + PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' + kubectl create namespace argocd + kubectl create secret generic repo-forge -n argocd \ + --from-literal=type=git \ + --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \ + --from-literal=insecure=true \ + --from-literal=sshPrivateKey="$PRIV_KEY" + kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository + + # Bootstrap operators + kubectl create namespace tailscale + kubectl apply -k argocd/manifests/tailscale-operator/ + kubectl apply -k argocd/manifests/argocd/ + + # Wait for ArgoCD + kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s + + # Login and sync apps + argocd login argocd.tail8d86e.ts.net --username admin --grpc-web + argocd app sync apps + argocd app sync tailscale-operator + argocd app sync cloudnative-pg + argocd app sync blumeops-pg + argocd app sync grafana + argocd app sync grafana-config + argocd app sync miniflux + argocd app sync devpi ``` -2. **Remove podman if no longer needed:** - ```bash - podman machine stop - podman machine rm - brew uninstall podman - ``` +2. **Verify all services** with `mise run indri-services-check` + +3. **Configure containerd registry mirrors** (will be done by ansible on next provision) + +--- + +## Technical Notes + +### API Server Port + +With docker driver, the API server port is **dynamic** - Docker maps a random host port to 6443 inside the container. Current port: 50820. + +The minikube ansible role queries the port after cluster start and configures tailscale serve accordingly. + +### Registry Mirror Configuration + +Containerd uses `/etc/containerd/certs.d//hosts.toml` files: + +```toml +# /etc/containerd/certs.d/docker.io/hosts.toml +server = "https://registry-1.docker.io" + +[host."http://host.minikube.internal:5050"] + capabilities = ["pull", "resolve"] + skip_verify = true +``` + +The ansible role configures mirrors for: +- `registry.tail8d86e.ts.net` (private images) +- `docker.io` +- `ghcr.io` +- `quay.io` + +### Volume Mounts for P6 (Kiwix/Transmission) + +With the docker driver, volume mounts work differently than podman or qemu2. Here's the analysis: + +**Current Network State:** +- Minikube container is on Docker network `192.168.49.0/24` +- Sifaka NFS exports `/volume1/torrents` to: + - `192.168.105.0/24` (old qemu2 VM network - no longer used) + - `100.64.0.0/10` (Tailscale CGNAT range) +- Minikube can resolve `sifaka` (192.168.1.203) but can't reach it (100% packet loss due to Docker network isolation) + +**Option A: hostPath via Docker Desktop File Sharing** ⭐ RECOMMENDED +1. Mount sifaka NFS share on indri macOS: `mount -t nfs sifaka:/volume1/torrents /Volumes/torrents` +2. Docker Desktop file sharing exposes `/Volumes` into the Docker VM +3. Pods use hostPath to access `/Volumes/torrents` + +Pros: +- Simplest approach, uses native Docker file sharing +- No network reconfiguration needed on sifaka +- Path is stable and predictable + +Cons: +- Requires persistent NFS mount on indri (LaunchDaemon) +- File sharing performance may be slower than direct NFS + +Implementation: +```bash +# Manual mount test +ssh indri 'sudo mkdir -p /Volumes/torrents && sudo mount -t nfs -o resvport,rw sifaka:/volume1/torrents /Volumes/torrents' + +# Verify Docker can see it +ssh indri 'docker run --rm -v /Volumes/torrents:/data alpine ls /data' + +# Pod manifest uses hostPath: +# volumes: +# - name: torrents +# hostPath: +# path: /Volumes/torrents +# type: Directory +``` + +**Option B: Update sifaka NFS exports for Docker network** +1. Add `192.168.49.0/24` to sifaka's NFS exports +2. Pods mount NFS directly using kubernetes NFS volume type + +Cons: +- Docker network might change (though `192.168.49.x` seems stable for minikube) +- Requires sifaka configuration change +- NFS mount from inside container may have permission issues + +**Option C: Tailscale sidecar for NFS access** +1. Pods include a Tailscale sidecar that joins the tailnet +2. Mount NFS via Tailscale IP (sifaka is at 100.x.x.x) + +Cons: +- Complex setup with sidecar containers +- Each pod needs Tailscale auth +- Overkill for this use case + +**Recommendation for P6:** +Use **Option A** (hostPath via Docker Desktop file sharing). It's the simplest and most reliable approach. We'll need a LaunchDaemon for the NFS mount, but it's straightforward: + +```xml + + + + + + Label + com.blumeops.nfs-torrents + ProgramArguments + + /sbin/mount + -t + nfs + -o + resvport,rw + sifaka:/volume1/torrents + /Volumes/torrents + + RunAtLoad + + + +``` + +This is simpler than the qemu2 approach because there's no intermediate `minikube mount` step - Docker Desktop handles the path passthrough automatically. --- ## Verification Checklist -- [ ] Docker Desktop installed and running on indri -- [ ] QEMU2 minikube deleted -- [ ] Docker minikube running -- [ ] API server accessible on localhost:6443 -- [ ] Tailscale serve configured for svc:k8s → localhost:6443 -- [ ] Remote kubectl access working from gilbert +- [x] Docker Desktop installed and running on indri +- [x] QEMU2 minikube deleted +- [x] Docker minikube running (6 CPUs, 11GB RAM) +- [x] API server accessible on localhost:50820 +- [x] Tailscale serve configured for svc:k8s → localhost:50820 +- [x] Remote kubectl access working from gilbert +- [x] Ansible roles updated for docker driver +- [x] socket_vmnet stopped - [ ] ArgoCD redeployed and synced - [ ] All existing apps healthy (grafana, miniflux, devpi, etc.) - [ ] PostgreSQL cluster healthy +- [ ] Containerd registry mirrors configured - [ ] `mise run indri-services-check` passes --- @@ -312,29 +275,3 @@ If Docker driver doesn't work: 1. Delete Docker minikube: `minikube delete` 2. Recreate QEMU2 cluster (restore old ansible config from git) 3. Accept the Tailscale TCP forwarding limitation and use SSH tunnel for remote kubectl - ---- - -## Notes - -- Docker Desktop has resource overhead but provides better macOS integration -- The docker driver is more widely used and tested than qemu2 -- File sharing permissions may need adjustment in Docker Desktop settings -- First cluster start may be slow as Docker pulls the minikube base image - -## Implementation Notes (2026-01-21) - -### QEMU2 Cleanup Done - -Removed from indri: -- `/Library/LaunchDaemons/com.blumeops.nfs-torrents.plist` - NFS mount daemon -- `~/Library/LaunchAgents/com.blumeops.minikube-mount.plist` - minikube mount agent -- Unmounted `/Volumes/torrents-nfs` NFS mount -- Removed `/Volumes/torrents-nfs` mount point - -### Previous QEMU2 Issues - -The QEMU2 migration partially worked but had a critical issue: -- Volume mounts worked via NFS → indri → minikube mount chain -- But Tailscale TCP proxy to VM IP (192.168.105.2:6443) failed with TLS timeout -- Root cause unknown - TCP connected but TLS handshake never completed -- 2.50.1 (Apple Git-155) From a7b0b84d081109ac09d4cd5d5297f17a7f9d036b Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 14:08:05 -0800 Subject: [PATCH 12/16] Update P5.1 with network connectivity finding After approving macOS network access GUI prompt, minikube can now reach sifaka. NFS still denied because sifaka exports don't include Docker network (192.168.49.0/24). Updated Option B as viable alternative if sifaka NFS exports are updated. Co-Authored-By: Claude Opus 4.5 --- plans/k8s-migration/P5.1_qemu2_migration.md | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index e37b65b..29636d6 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -169,7 +169,8 @@ With the docker driver, volume mounts work differently than podman or qemu2. Her - Sifaka NFS exports `/volume1/torrents` to: - `192.168.105.0/24` (old qemu2 VM network - no longer used) - `100.64.0.0/10` (Tailscale CGNAT range) -- Minikube can resolve `sifaka` (192.168.1.203) but can't reach it (100% packet loss due to Docker network isolation) +- Network connectivity: ✅ Works after approving macOS network access GUI prompt +- NFS access: ❌ Denied (sifaka doesn't allow `192.168.49.0/24`) **Option A: hostPath via Docker Desktop File Sharing** ⭐ RECOMMENDED 1. Mount sifaka NFS share on indri macOS: `mount -t nfs sifaka:/volume1/torrents /Volumes/torrents` @@ -201,14 +202,24 @@ ssh indri 'docker run --rm -v /Volumes/torrents:/data alpine ls /data' # type: Directory ``` -**Option B: Update sifaka NFS exports for Docker network** -1. Add `192.168.49.0/24` to sifaka's NFS exports -2. Pods mount NFS directly using kubernetes NFS volume type +**Option B: Update sifaka NFS exports for Docker network** ⭐ ALTERNATIVE +1. In Synology DSM: Control Panel → Shared Folder → torrents → Edit → NFS Permissions +2. Add `192.168.49.0/24` to allowed clients +3. Pods mount NFS directly using kubernetes NFS volume type + +Pros: +- Simpler than Option A (no intermediate macOS mount) +- Direct path, better performance +- Network connectivity confirmed working (after macOS network access approval) Cons: +- Requires sifaka configuration change (one-time) - Docker network might change (though `192.168.49.x` seems stable for minikube) -- Requires sifaka configuration change -- NFS mount from inside container may have permission issues + +Test command (after updating sifaka): +```bash +ssh indri 'minikube ssh "sudo mount -t nfs sifaka:/volume1/torrents /mnt/torrents && ls /mnt/torrents"' +``` **Option C: Tailscale sidecar for NFS access** 1. Pods include a Tailscale sidecar that joins the tailnet -- 2.50.1 (Apple Git-155) From f0a3cbf5ffaa7c3eeaca95a31df8ed9a87b54f3c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 14:25:02 -0800 Subject: [PATCH 13/16] Rename ProxyClass from crio-compat to default The crio-compat name was misleading since we're now using docker driver. Renamed to 'default' and updated all references. Fully-qualified image names still used for consistency across runtimes. Co-Authored-By: Claude Opus 4.5 --- argocd/manifests/argocd/service-tailscale.yaml | 2 +- .../manifests/databases/service-tailscale.yaml | 2 +- argocd/manifests/devpi/ingress-tailscale.yaml | 2 +- .../grafana-config/ingress-tailscale.yaml | 2 +- argocd/manifests/miniflux/ingress-tailscale.yaml | 2 +- argocd/manifests/tailscale-operator/README.md | 8 ++++---- .../tailscale-operator/egress-forge.yaml | 2 +- .../manifests/tailscale-operator/proxyclass.yaml | 16 +++++----------- 8 files changed, 15 insertions(+), 21 deletions(-) diff --git a/argocd/manifests/argocd/service-tailscale.yaml b/argocd/manifests/argocd/service-tailscale.yaml index 2c95cd6..2fc4ce0 100644 --- a/argocd/manifests/argocd/service-tailscale.yaml +++ b/argocd/manifests/argocd/service-tailscale.yaml @@ -10,7 +10,7 @@ metadata: name: argocd-server-tailscale namespace: argocd annotations: - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: ingressClassName: tailscale defaultBackend: diff --git a/argocd/manifests/databases/service-tailscale.yaml b/argocd/manifests/databases/service-tailscale.yaml index 7d816ff..c41fb87 100644 --- a/argocd/manifests/databases/service-tailscale.yaml +++ b/argocd/manifests/databases/service-tailscale.yaml @@ -7,7 +7,7 @@ metadata: namespace: databases annotations: tailscale.com/hostname: "pg" - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: type: LoadBalancer loadBalancerClass: tailscale diff --git a/argocd/manifests/devpi/ingress-tailscale.yaml b/argocd/manifests/devpi/ingress-tailscale.yaml index 2a1c659..8f37d17 100644 --- a/argocd/manifests/devpi/ingress-tailscale.yaml +++ b/argocd/manifests/devpi/ingress-tailscale.yaml @@ -4,7 +4,7 @@ metadata: name: devpi-tailscale namespace: devpi annotations: - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: ingressClassName: tailscale defaultBackend: diff --git a/argocd/manifests/grafana-config/ingress-tailscale.yaml b/argocd/manifests/grafana-config/ingress-tailscale.yaml index 8e6fe24..b72f8b9 100644 --- a/argocd/manifests/grafana-config/ingress-tailscale.yaml +++ b/argocd/manifests/grafana-config/ingress-tailscale.yaml @@ -8,7 +8,7 @@ metadata: name: grafana-tailscale namespace: monitoring annotations: - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: ingressClassName: tailscale defaultBackend: diff --git a/argocd/manifests/miniflux/ingress-tailscale.yaml b/argocd/manifests/miniflux/ingress-tailscale.yaml index e384f66..8884c61 100644 --- a/argocd/manifests/miniflux/ingress-tailscale.yaml +++ b/argocd/manifests/miniflux/ingress-tailscale.yaml @@ -4,7 +4,7 @@ metadata: name: miniflux-tailscale namespace: miniflux annotations: - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: ingressClassName: tailscale defaultBackend: diff --git a/argocd/manifests/tailscale-operator/README.md b/argocd/manifests/tailscale-operator/README.md index 6365ffe..44c5089 100644 --- a/argocd/manifests/tailscale-operator/README.md +++ b/argocd/manifests/tailscale-operator/README.md @@ -6,7 +6,7 @@ Manifests for the Tailscale Kubernetes Operator, managed via ArgoCD. - `operator.yaml` - Static manifest from https://github.com/tailscale/tailscale/tree/main/cmd/k8s-operator/deploy/manifests - Secret block removed from `operator.yaml` - managed separately via `secret.yaml.tpl` -- Image reference changed to fully-qualified `docker.io/tailscale/k8s-operator:stable` for CRI-O compatibility +- Image reference changed to fully-qualified `docker.io/tailscale/k8s-operator:stable` ## Prerequisites @@ -71,7 +71,7 @@ kubectl logs -n tailscale -l app.kubernetes.io/name=operator |------|-------------| | `kustomization.yaml` | Kustomize configuration for all manifests | | `operator.yaml` | Operator deployment, CRDs, RBAC (secret removed) | -| `proxyclass.yaml` | ProxyClass with fully-qualified images for CRI-O | +| `proxyclass.yaml` | ProxyClass with fully-qualified images | | `dnsconfig.yaml` | DNSConfig for cluster-to-tailnet name resolution | | `egress-forge.yaml` | Egress proxy for accessing forge on indri | | `secret.yaml.tpl` | 1Password template for OAuth credentials (manual) | @@ -81,10 +81,10 @@ kubectl logs -n tailscale -l app.kubernetes.io/name=operator - **TODO:** The OAuth secret (`operator-oauth`) is not managed by ArgoCD and must be applied manually. Future improvement: integrate with a secrets operator (e.g., External Secrets). -- Services using the Tailscale LoadBalancer must reference the ProxyClass: +- Services using the Tailscale LoadBalancer should reference the ProxyClass: ```yaml annotations: - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" ``` - The egress proxy for forge targets `indri.tail8d86e.ts.net` directly (not `forge.tail8d86e.ts.net`) because Tailscale Serve hostnames are virtual and only work via the Tailscale client. diff --git a/argocd/manifests/tailscale-operator/egress-forge.yaml b/argocd/manifests/tailscale-operator/egress-forge.yaml index fec1752..8705eea 100644 --- a/argocd/manifests/tailscale-operator/egress-forge.yaml +++ b/argocd/manifests/tailscale-operator/egress-forge.yaml @@ -11,7 +11,7 @@ metadata: namespace: tailscale annotations: tailscale.com/tailnet-fqdn: indri.tail8d86e.ts.net - tailscale.com/proxy-class: "crio-compat" + tailscale.com/proxy-class: "default" spec: type: ExternalName externalName: placeholder diff --git a/argocd/manifests/tailscale-operator/proxyclass.yaml b/argocd/manifests/tailscale-operator/proxyclass.yaml index bb22ad7..2e247d7 100644 --- a/argocd/manifests/tailscale-operator/proxyclass.yaml +++ b/argocd/manifests/tailscale-operator/proxyclass.yaml @@ -1,17 +1,11 @@ -# ProxyClass: crio-compat +# ProxyClass: default # -# Why this exists: -# CRI-O (the container runtime used by minikube) cannot resolve short image -# names like "tailscale/tailscale:stable". It requires fully-qualified names -# with an explicit registry prefix (e.g., "docker.io/tailscale/tailscale:stable"). -# -# The Tailscale operator creates proxy pods (StatefulSets) for each LoadBalancer -# Service or Ingress. By default, these pods use short image names which fail -# on CRI-O with "ImageInspectError". +# Specifies fully-qualified image names for Tailscale proxy pods. +# This ensures consistent behavior across different container runtimes. # # Usage: # Add this annotation to any Tailscale Service or Ingress: -# tailscale.com/proxy-class: "crio-compat" +# tailscale.com/proxy-class: "default" # # This tells the operator to use the fully-qualified image names defined below # when creating the proxy pod for that resource. @@ -19,7 +13,7 @@ apiVersion: tailscale.com/v1alpha1 kind: ProxyClass metadata: - name: crio-compat + name: default spec: statefulSet: pod: -- 2.50.1 (Apple Git-155) From 70357d247b55f989e6c17f090073b1a0a81ad9d9 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 14:49:04 -0800 Subject: [PATCH 14/16] Update P5.1 plan to complete status - ArgoCD deployed and all apps synced - Document remaining steps (secrets, post-merge reset) - Simplified and reorganized documentation Co-Authored-By: Claude Opus 4.5 --- plans/k8s-migration/P5.1_qemu2_migration.md | 241 ++++++-------------- 1 file changed, 76 insertions(+), 165 deletions(-) diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_qemu2_migration.md index 29636d6..6f997e1 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_qemu2_migration.md @@ -2,7 +2,7 @@ **Goal**: Replace the qemu2 driver with docker to fix remote API access and simplify volume mounts -**Status**: In Progress (2026-01-21) - Ansible roles updated, cluster running, awaiting ArgoCD redeploy +**Status**: Complete (2026-01-21) - Cluster running, ArgoCD deployed, apps synced **Prerequisites**: [Phase 5](P5_devpi.complete.md) complete @@ -46,17 +46,15 @@ The **docker driver** solves both problems: --- -## Implementation Progress +## Implementation Summary -### Completed ✅ +### Infrastructure Changes 1. **Docker Desktop installed** (manual via `brew install --cask docker`) - Configured with 12GB memory in Docker Desktop settings - Kubernetes option disabled (using minikube instead) -2. **QEMU2 minikube deleted** (`minikube stop && minikube delete`) - -3. **Docker minikube cluster created**: +2. **Docker minikube cluster created**: ```bash minikube start \ --driver=docker \ @@ -68,68 +66,47 @@ The **docker driver** solves both problems: --apiserver-port=6443 \ --listen-address=0.0.0.0 ``` - Note: Memory set to 11264MB (11GB) to leave headroom for Docker Desktop overhead. -4. **Tailscale serve configured** for k8s API: - - API server on localhost:50820 (port is dynamic with docker driver) - - `tailscale serve --service=svc:k8s --tcp=443 tcp://localhost:50820` +3. **Tailscale serve configured** for k8s API: + - API server on localhost (port is dynamic with docker driver) + - `tailscale serve --service=svc:k8s --tcp=443 tcp://localhost:` -5. **Remote kubectl access working** from gilbert: +4. **Remote kubectl access working** from gilbert: - Created `mise-tasks/ensure-minikube-indri-kubectl-config` script - Fetches certs from indri and sets up `~/.kube/minikube-indri/config.yml` - - `kubectl --context=minikube-indri get nodes` works -6. **Ansible roles updated**: - - `ansible/roles/minikube/` - docker driver, removed qemu2/NFS/socket_vmnet - - `ansible/roles/tailscale_serve/` - removed svc:k8s (minikube role handles dynamic port) - - Containerd registry mirrors configured for zot pull-through cache +### Ansible Roles Updated -7. **QEMU2 artifacts cleaned up**: - - Stopped socket_vmnet service - - Removed NFS LaunchDaemon - - Removed minikube mount LaunchAgent - - kubectl still works after cleanup +- `ansible/roles/minikube/` - docker driver, removed qemu2/NFS/socket_vmnet +- `ansible/roles/tailscale_serve/` - removed svc:k8s (minikube role handles dynamic port) +- Containerd registry mirrors configured for zot pull-through cache -### Remaining 📋 +### ArgoCD Bootstrap -1. **Redeploy ArgoCD and apps** - bootstrap the cluster with: - ```bash - # On indri - apply secrets first - op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f - +All apps deployed and synced from `feature/p5.1-qemu2-migration` branch: - # Create repo secret for ArgoCD - PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' - kubectl create namespace argocd - kubectl create secret generic repo-forge -n argocd \ - --from-literal=type=git \ - --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \ - --from-literal=insecure=true \ - --from-literal=sshPrivateKey="$PRIV_KEY" - kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository +| App | Status | Notes | +|-----|--------|-------| +| tailscale-operator | Healthy | Manages Tailscale ingresses | +| argocd | Healthy | Self-managed | +| cloudnative-pg | Healthy | PostgreSQL operator | +| blumeops-pg | Progressing | PostgreSQL cluster starting | +| grafana | Progressing | Needs grafana-admin secret | +| grafana-config | Healthy | Dashboards and ingress | +| miniflux | Progressing | Needs miniflux-config secret | +| devpi | Progressing | Starting up | - # Bootstrap operators - kubectl create namespace tailscale - kubectl apply -k argocd/manifests/tailscale-operator/ - kubectl apply -k argocd/manifests/argocd/ +### Secrets Still Needed - # Wait for ArgoCD - kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s +After PR merge, apply these secrets manually: - # Login and sync apps - argocd login argocd.tail8d86e.ts.net --username admin --grpc-web - argocd app sync apps - argocd app sync tailscale-operator - argocd app sync cloudnative-pg - argocd app sync blumeops-pg - argocd app sync grafana - argocd app sync grafana-config - argocd app sync miniflux - argocd app sync devpi - ``` +```bash +# Grafana admin password +op inject -i argocd/manifests/grafana-config/secret-admin.yaml.tpl | kubectl --context=minikube-indri apply -f - -2. **Verify all services** with `mise run indri-services-check` - -3. **Configure containerd registry mirrors** (will be done by ansible on next provision) +# Miniflux config +op inject -i argocd/manifests/miniflux/secret.yaml.tpl | kubectl --context=minikube-indri apply -f - +``` --- @@ -137,127 +114,34 @@ The **docker driver** solves both problems: ### API Server Port -With docker driver, the API server port is **dynamic** - Docker maps a random host port to 6443 inside the container. Current port: 50820. +With docker driver, the API server port is **dynamic** - Docker maps a random host port to 6443 inside the container. The minikube ansible role queries the port after cluster start and configures tailscale serve accordingly. ### Registry Mirror Configuration -Containerd uses `/etc/containerd/certs.d//hosts.toml` files: - -```toml -# /etc/containerd/certs.d/docker.io/hosts.toml -server = "https://registry-1.docker.io" - -[host."http://host.minikube.internal:5050"] - capabilities = ["pull", "resolve"] - skip_verify = true -``` - -The ansible role configures mirrors for: +Containerd uses `/etc/containerd/certs.d//hosts.toml` files. The ansible role configures mirrors for: - `registry.tail8d86e.ts.net` (private images) - `docker.io` - `ghcr.io` - `quay.io` +### ProxyClass Renamed + +Changed from `crio-compat` to `default` - the old name was misleading since we're no longer using CRI-O. + ### Volume Mounts for P6 (Kiwix/Transmission) -With the docker driver, volume mounts work differently than podman or qemu2. Here's the analysis: - -**Current Network State:** -- Minikube container is on Docker network `192.168.49.0/24` -- Sifaka NFS exports `/volume1/torrents` to: - - `192.168.105.0/24` (old qemu2 VM network - no longer used) - - `100.64.0.0/10` (Tailscale CGNAT range) -- Network connectivity: ✅ Works after approving macOS network access GUI prompt -- NFS access: ❌ Denied (sifaka doesn't allow `192.168.49.0/24`) +Two options available: **Option A: hostPath via Docker Desktop File Sharing** ⭐ RECOMMENDED -1. Mount sifaka NFS share on indri macOS: `mount -t nfs sifaka:/volume1/torrents /Volumes/torrents` +1. Mount sifaka NFS share on indri macOS: `/Volumes/torrents` 2. Docker Desktop file sharing exposes `/Volumes` into the Docker VM 3. Pods use hostPath to access `/Volumes/torrents` -Pros: -- Simplest approach, uses native Docker file sharing -- No network reconfiguration needed on sifaka -- Path is stable and predictable - -Cons: -- Requires persistent NFS mount on indri (LaunchDaemon) -- File sharing performance may be slower than direct NFS - -Implementation: -```bash -# Manual mount test -ssh indri 'sudo mkdir -p /Volumes/torrents && sudo mount -t nfs -o resvport,rw sifaka:/volume1/torrents /Volumes/torrents' - -# Verify Docker can see it -ssh indri 'docker run --rm -v /Volumes/torrents:/data alpine ls /data' - -# Pod manifest uses hostPath: -# volumes: -# - name: torrents -# hostPath: -# path: /Volumes/torrents -# type: Directory -``` - -**Option B: Update sifaka NFS exports for Docker network** ⭐ ALTERNATIVE -1. In Synology DSM: Control Panel → Shared Folder → torrents → Edit → NFS Permissions -2. Add `192.168.49.0/24` to allowed clients -3. Pods mount NFS directly using kubernetes NFS volume type - -Pros: -- Simpler than Option A (no intermediate macOS mount) -- Direct path, better performance -- Network connectivity confirmed working (after macOS network access approval) - -Cons: -- Requires sifaka configuration change (one-time) -- Docker network might change (though `192.168.49.x` seems stable for minikube) - -Test command (after updating sifaka): -```bash -ssh indri 'minikube ssh "sudo mount -t nfs sifaka:/volume1/torrents /mnt/torrents && ls /mnt/torrents"' -``` - -**Option C: Tailscale sidecar for NFS access** -1. Pods include a Tailscale sidecar that joins the tailnet -2. Mount NFS via Tailscale IP (sifaka is at 100.x.x.x) - -Cons: -- Complex setup with sidecar containers -- Each pod needs Tailscale auth -- Overkill for this use case - -**Recommendation for P6:** -Use **Option A** (hostPath via Docker Desktop file sharing). It's the simplest and most reliable approach. We'll need a LaunchDaemon for the NFS mount, but it's straightforward: - -```xml - - - - - - Label - com.blumeops.nfs-torrents - ProgramArguments - - /sbin/mount - -t - nfs - -o - resvport,rw - sifaka:/volume1/torrents - /Volumes/torrents - - RunAtLoad - - - -``` - -This is simpler than the qemu2 approach because there's no intermediate `minikube mount` step - Docker Desktop handles the path passthrough automatically. +**Option B: Update sifaka NFS exports for Docker network** +1. Add `192.168.49.0/24` to sifaka's NFS exports +2. Pods mount NFS directly (network connectivity works after macOS approval) --- @@ -266,19 +150,46 @@ This is simpler than the qemu2 approach because there's no intermediate `minikub - [x] Docker Desktop installed and running on indri - [x] QEMU2 minikube deleted - [x] Docker minikube running (6 CPUs, 11GB RAM) -- [x] API server accessible on localhost:50820 -- [x] Tailscale serve configured for svc:k8s → localhost:50820 +- [x] API server accessible on localhost +- [x] Tailscale serve configured for svc:k8s - [x] Remote kubectl access working from gilbert - [x] Ansible roles updated for docker driver - [x] socket_vmnet stopped -- [ ] ArgoCD redeployed and synced -- [ ] All existing apps healthy (grafana, miniflux, devpi, etc.) -- [ ] PostgreSQL cluster healthy -- [ ] Containerd registry mirrors configured +- [x] ArgoCD deployed and synced +- [x] All apps synced to feature branch +- [ ] Apply app secrets (grafana-admin, miniflux-config) +- [ ] Verify all apps healthy after secrets applied +- [ ] Merge PR and reset apps to main branch - [ ] `mise run indri-services-check` passes --- +## Post-Merge Steps + +After PR is merged: + +```bash +# Reset all blumeops apps to main branch +argocd app set apps --revision main +argocd app set argocd --revision main +argocd app set blumeops-pg --revision main +argocd app set devpi --revision main +argocd app set grafana-config --revision main +argocd app set miniflux --revision main +argocd app set tailscale-operator --revision main + +# Sync all apps +argocd app sync apps +argocd app sync argocd +argocd app sync tailscale-operator +argocd app sync blumeops-pg +argocd app sync grafana-config +argocd app sync miniflux +argocd app sync devpi +``` + +--- + ## Rollback Plan If Docker driver doesn't work: -- 2.50.1 (Apple Git-155) From 181f11265d77fd21f7d162fea2736886b93430cb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 15:29:38 -0800 Subject: [PATCH 15/16] Add restore from backup instructions to miniflux README Co-Authored-By: Claude Opus 4.5 --- argocd/manifests/miniflux/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/argocd/manifests/miniflux/README.md b/argocd/manifests/miniflux/README.md index 18780ca..4d093dd 100644 --- a/argocd/manifests/miniflux/README.md +++ b/argocd/manifests/miniflux/README.md @@ -60,3 +60,13 @@ Connects to PostgreSQL via internal k8s DNS: The database is also accessible externally via Tailscale at: `pg.tail8d86e.ts.net:5432` + +## Restore from Backup + +If the database needs to be restored from a borgmatic backup: + +1. List archives: `borgmatic list` +2. Extract dump from archive using `borg extract` to `/tmp/restore` +3. Restore with `pg_restore --clean --if-exists --no-owner --no-acl` +4. Fix ownership - ensure user `miniflux` owns all tables, sequences, and types in the `public` schema (restore runs as `eblume`) +5. Restart miniflux deployment -- 2.50.1 (Apple Git-155) From 960395eea493f4d969392c3faf97d6af0e770365 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 21 Jan 2026 15:59:54 -0800 Subject: [PATCH 16/16] Complete P5.1 docker migration, update P6 with NFS solution - Rename P5.1 from qemu2_migration to docker_migration - Mark P5.1 verification checklist complete - Document direct NFS volume mount solution (tested and working) - Update P6 to use NFS instead of SMB CSI - Unblock P6: ready to implement Sifaka NFS exports now allow 192.168.1.0/24 (docker via indri NAT) and 100.64.0.0/10 (Tailscale). No LaunchAgents or minikube mount needed. Co-Authored-By: Claude Opus 4.5 --- ..._migration.md => P5.1_docker_migration.md} | 29 ++- plans/k8s-migration/P6_kiwix.md | 179 ++++-------------- 2 files changed, 51 insertions(+), 157 deletions(-) rename plans/k8s-migration/{P5.1_qemu2_migration.md => P5.1_docker_migration.md} (89%) diff --git a/plans/k8s-migration/P5.1_qemu2_migration.md b/plans/k8s-migration/P5.1_docker_migration.md similarity index 89% rename from plans/k8s-migration/P5.1_qemu2_migration.md rename to plans/k8s-migration/P5.1_docker_migration.md index 6f997e1..d91d6de 100644 --- a/plans/k8s-migration/P5.1_qemu2_migration.md +++ b/plans/k8s-migration/P5.1_docker_migration.md @@ -132,16 +132,24 @@ Changed from `crio-compat` to `default` - the old name was misleading since we'r ### Volume Mounts for P6 (Kiwix/Transmission) -Two options available: +**Solution: Direct NFS from pods to sifaka** ✅ TESTED AND WORKING -**Option A: hostPath via Docker Desktop File Sharing** ⭐ RECOMMENDED -1. Mount sifaka NFS share on indri macOS: `/Volumes/torrents` -2. Docker Desktop file sharing exposes `/Volumes` into the Docker VM -3. Pods use hostPath to access `/Volumes/torrents` +Docker NATs outbound traffic through indri's LAN IP (192.168.1.50), so sifaka's NFS exports need to allow `192.168.1.0/24`. -**Option B: Update sifaka NFS exports for Docker network** -1. Add `192.168.49.0/24` to sifaka's NFS exports -2. Pods mount NFS directly (network connectivity works after macOS approval) +Sifaka NFS exports configured: +- `192.168.1.0/24` - Docker containers via indri NAT +- `100.64.0.0/10` - Tailscale clients + +Pods can mount NFS directly: +```yaml +volumes: + - name: torrents + nfs: + server: sifaka + path: /volume1/torrents +``` + +No LaunchAgents, no `minikube mount`, no SMB CSI driver needed. --- @@ -157,8 +165,9 @@ Two options available: - [x] socket_vmnet stopped - [x] ArgoCD deployed and synced - [x] All apps synced to feature branch -- [ ] Apply app secrets (grafana-admin, miniflux-config) -- [ ] Verify all apps healthy after secrets applied +- [x] Apply app secrets (grafana-admin, miniflux-db, devpi-root, eblume, borgmatic) +- [x] Verify all apps healthy after secrets applied +- [x] Miniflux database restored from borgmatic backup - [ ] Merge PR and reset apps to main branch - [ ] `mise run indri-services-check` passes diff --git a/plans/k8s-migration/P6_kiwix.md b/plans/k8s-migration/P6_kiwix.md index eeec827..6e4ebea 100644 --- a/plans/k8s-migration/P6_kiwix.md +++ b/plans/k8s-migration/P6_kiwix.md @@ -2,9 +2,9 @@ **Goal**: Migrate kiwix-serve and transmission torrent daemon to k8s with shared storage -**Status**: BLOCKED - waiting for [Phase 5.1](P5.1_qemu2_migration.md) (QEMU2 migration) +**Status**: Ready to implement -**Prerequisites**: [Phase 5.1](P5.1_qemu2_migration.md) complete (minikube on QEMU2 driver) +**Prerequisites**: [Phase 5.1](P5.1_docker_migration.md) complete (minikube on docker driver) --- @@ -62,19 +62,18 @@ New architecture in k8s: ## Architecture Decisions -### Storage: SMB on Sifaka (or NFS after QEMU2 migration) +### Storage: Direct NFS to Sifaka ✅ TESTED -**Note:** The original plan chose SMB over NFS, but both failed with podman driver. After QEMU2 migration, either should work. SMB is still preferred for: -- Native Synology SMB support with good macOS compatibility -- ReadWriteMany access mode for concurrent pod access -- SMB CSI driver already mirrored to forge +**Solution:** Direct NFS volume mounts from pods to sifaka. No SMB CSI driver or `minikube mount` needed. -**Alternative after QEMU2:** NFS may be simpler with `minikube mount` or direct NFS volume type. +With the docker driver, minikube containers NAT outbound traffic through indri's LAN IP (192.168.1.50). Sifaka's NFS exports are configured to allow: +- `192.168.1.0/24` - Docker containers via indri NAT +- `100.64.0.0/10` - Tailscale clients -**Storage path:** `/volume1/torrents/` on sifaka (SMB share name: `torrents`) +**Storage path:** `/volume1/torrents/` on sifaka (NFS export) - General-purpose torrent download directory - Contains ZIM files, Linux ISOs, and whatever else users download -- Accessed via SMB credentials stored in k8s Secret +- Accessed via native k8s NFS volume (no credentials needed - IP-based access) **No backup needed:** - Sifaka is RAID 5/6, already the backup target @@ -142,49 +141,19 @@ This allows adding new ZIM archives by: ## Prerequisites (Manual Steps) -### 1. Configure SMB Share on Sifaka +### 1. Configure NFS Export on Sifaka -**Status: DONE** - The `torrents` shared folder has been created at `/volume1/torrents`. +**Status: DONE** - The `torrents` shared folder exists at `/volume1/torrents` with NFS exports allowing: +- `192.168.1.0/24` - Docker containers via indri NAT +- `100.64.0.0/10` - Tailscale clients -### 2. Create Dedicated Synology User for Kubernetes (USER ACTION REQUIRED) - -Create a dedicated Synology user for k8s SMB access (do not use personal account): - -On Synology DSM (Control Panel → User & Group): -1. Create new user: `k8s-smb` (or similar) - - Set a strong password - - No admin privileges needed - - Deny access to all applications (only needs file services) -2. Set permissions on the `torrents` share: - - Give `k8s-smb` user Read/Write access - - Remove or limit other user access as appropriate -3. Store credentials in 1Password: - - Vault: `vg6xf6vvfmoh5hqjjhlhbeoaie` (blumeops vault) - - Item name: `synology-smb-k8s` - - Fields: `username` (k8s-smb), `password` - -### 3. Mirror SMB CSI Driver Helm Chart to Forge (USER ACTION REQUIRED) - -Mirror the SMB CSI driver chart to forge for GitOps deployment: - -```bash -# Clone the upstream chart repo -cd ~/code/3rd -git clone https://github.com/kubernetes-csi/csi-driver-smb.git -cd csi-driver-smb - -# Push to forge mirror -git remote add forge ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/csi-driver-smb.git -git push forge --all --tags -``` - -### 4. Copy Existing Downloads to Sifaka +### 2. Copy Existing Downloads to Sifaka Before migration, copy existing downloads to avoid re-downloading ~138GB: ```bash -# From indri - mount the SMB share via Finder or command line -open smb://sifaka/torrents +# From indri - mount the NFS share +sudo mount -t nfs sifaka:/volume1/torrents /Volumes/torrents # Then rsync (adjust mount path as needed) rsync -avP ~/transmission/ /Volumes/torrents/ @@ -193,69 +162,21 @@ rsync -avP ~/transmission/ /Volumes/torrents/ ls -la /Volumes/torrents/*.zim ``` -### 5. Store SMB Credentials in 1Password - -**Note:** This is covered in step 2 above. The 1Password item should be: -- Vault: `vg6xf6vvfmoh5hqjjhlhbeoaie` (blumeops vault) -- Item name: `synology-smb-k8s` -- Fields: `username` (k8s-smb), `password` - --- ## Steps -### 1. Deploy SMB CSI Driver via ArgoCD +### 1. Create Shared NFS PersistentVolume -**File:** `argocd/manifests/smb-csi/values.yaml` +This PV is shared between transmission and kiwix namespaces. Uses direct NFS - no CSI driver needed. -```yaml -# Minimal values - defaults are generally fine -controller: - replicas: 1 -``` - -**File:** `argocd/apps/smb-csi.yaml` - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: smb-csi - namespace: argocd -spec: - project: default - sources: - # Helm chart from forge mirror - - repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/csi-driver-smb.git - targetRevision: v1.17.0 - path: charts/csi-driver-smb - helm: - releaseName: csi-driver-smb - valueFiles: - - $values/argocd/manifests/smb-csi/values.yaml - # Values from our git repo - - repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git - targetRevision: main - ref: values - destination: - server: https://kubernetes.default.svc - namespace: kube-system - syncPolicy: - syncOptions: - - CreateNamespace=true -``` - -### 2. Create Shared SMB PersistentVolume - -This PV is shared between transmission and kiwix namespaces. - -**File:** `argocd/manifests/torrent/pv-smb.yaml` +**File:** `argocd/manifests/torrent/pv-nfs.yaml` ```yaml apiVersion: v1 kind: PersistentVolume metadata: - name: torrents-smb-pv + name: torrents-nfs-pv spec: capacity: storage: 1Ti @@ -263,43 +184,12 @@ spec: - ReadWriteMany persistentVolumeReclaimPolicy: Retain storageClassName: "" - mountOptions: - - dir_mode=0777 - - file_mode=0777 - - uid=1000 - - gid=1000 - - noperm - - mfsymlinks - - cache=strict - - noserverino # Required to prevent data corruption - csi: - driver: smb.csi.k8s.io - volumeHandle: torrents-smb-pv - volumeAttributes: - source: //sifaka/torrents - nodeStageSecretRef: - name: smbcreds - namespace: torrent + nfs: + server: sifaka + path: /volume1/torrents ``` -**File:** `argocd/manifests/torrent/secret-smb.yaml.tpl` - -```yaml -# Template - apply manually with credentials from 1Password -# kubectl --context=minikube create secret generic smbcreds \ -# --namespace torrent \ -# --from-literal=username=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/synology-smb-k8s/username") \ -# --from-literal=password=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/synology-smb-k8s/password") -apiVersion: v1 -kind: Secret -metadata: - name: smbcreds - namespace: torrent -type: Opaque -stringData: - username: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/synology-smb-k8s/username }}" - password: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/synology-smb-k8s/password }}" -``` +No secrets needed - NFS uses IP-based access control configured on sifaka. --- @@ -319,7 +209,7 @@ spec: accessModes: - ReadWriteMany storageClassName: "" - volumeName: torrents-smb-pv + volumeName: torrents-nfs-pv resources: requests: storage: 1Ti @@ -439,8 +329,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: torrent resources: - - pv-smb.yaml - - secret-smb.yaml.tpl + - pv-nfs.yaml - pvc.yaml - deployment.yaml - service.yaml @@ -473,7 +362,7 @@ spec: ## Kiwix Service -### 3. Create Kiwix PVC (References Same PV) +### 2. Create Kiwix PVC (References Same PV) **File:** `argocd/manifests/kiwix/pvc.yaml` @@ -487,7 +376,7 @@ spec: accessModes: - ReadWriteMany # Need write for the sync sidecar to work storageClassName: "" - volumeName: torrents-smb-pv + volumeName: torrents-nfs-pv resources: requests: storage: 1Ti @@ -1096,10 +985,7 @@ If migration fails: |------|---------| | **Transmission (torrent namespace)** | | | `argocd/apps/torrent.yaml` | ArgoCD Application for transmission | -| `argocd/apps/smb-csi.yaml` | ArgoCD Application for SMB CSI driver | -| `argocd/manifests/smb-csi/values.yaml` | SMB CSI driver Helm values | -| `argocd/manifests/torrent/pv-smb.yaml` | Shared SMB PersistentVolume | -| `argocd/manifests/torrent/secret-smb.yaml.tpl` | SMB credentials secret template | +| `argocd/manifests/torrent/pv-nfs.yaml` | Shared NFS PersistentVolume | | `argocd/manifests/torrent/pvc.yaml` | Transmission PVC | | `argocd/manifests/torrent/deployment.yaml` | Transmission deployment | | `argocd/manifests/torrent/service.yaml` | Transmission service | @@ -1134,11 +1020,10 @@ If migration fails: ## Verification Checklist -- [x] SMB share configured on sifaka (`/volume1/torrents`) -- [ ] Dedicated Synology user (`k8s-smb`) created for k8s access -- [ ] SMB CSI driver deployed to k8s +- [x] NFS export configured on sifaka (`/volume1/torrents`) +- [x] NFS exports allow 192.168.1.0/24 and 100.64.0.0/10 +- [x] Direct NFS mount from pod tested and working - [ ] Existing downloads copied to sifaka -- [ ] SMB credentials secret created in k8s (using `k8s-smb` user) - [ ] Transmission pod running in k8s (`torrent` namespace) - [ ] https://torrent.tail8d86e.ts.net accessible (web UI) - [ ] Can add torrents manually via web UI -- 2.50.1 (Apple Git-155)