K8s Migration Phase 1: Infrastructure Setup (#29)

## Summary
- Split k8s migration plan into phases folder for easier navigation
- Added `tag:k8s` to Pulumi ACLs for Kubernetes workloads
- Phase 1 work in progress

## Phase 1 Goals
- Tailscale Kubernetes Operator
- CloudNativePG Operator
- PostgreSQL cluster for future app migrations

## Deployment and Testing
- [ ] Review Phase 1 plan
- [ ] `mise run tailnet-preview` to verify ACL changes
- [ ] `mise run tailnet-up` to apply ACL changes
- [ ] Create Tailscale OAuth client (manual)
- [ ] Deploy operators and PostgreSQL cluster

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/29
This commit is contained in:
Erich Blume 2026-01-19 09:49:52 -08:00
commit a8f4d00294
42 changed files with 7501 additions and 547 deletions

View file

@ -27,3 +27,5 @@ rules:
ignore: ignore:
- .venv/ - .venv/
- pulumi/.venv/ - pulumi/.venv/
# Third-party k8s manifest with non-standard formatting
- argocd/manifests/tailscale-operator/operator.yaml

View file

@ -1,4 +1,5 @@
# CLI tools for blumeops management # CLI tools for blumeops management
brew "argocd" # ArgoCD CLI for GitOps management
brew "bat" # Syntax-highlighted file concatenation brew "bat" # Syntax-highlighted file concatenation
brew "tea" # Gitea/Forgejo CLI for forge.tail8d86e.ts.net brew "tea" # Gitea/Forgejo CLI for forge.tail8d86e.ts.net
brew "podman" # Container CLI (uses VM on macOS, for building/pushing images) brew "podman" # Container CLI (uses VM on macOS, for building/pushing images)

View file

@ -0,0 +1,27 @@
# Zot pull-through cache on indri
# Uses host.containers.internal which is stable across restarts
# Applied by ansible minikube role
[[registry]]
prefix = "docker.io"
location = "docker.io"
[[registry.mirror]]
location = "host.containers.internal:5050/docker.io"
insecure = true
[[registry]]
prefix = "ghcr.io"
location = "ghcr.io"
[[registry.mirror]]
location = "host.containers.internal:5050/ghcr.io"
insecure = true
[[registry]]
prefix = "quay.io"
location = "quay.io"
[[registry.mirror]]
location = "host.containers.internal:5050/quay.io"
insecure = true

View file

@ -7,3 +7,8 @@
minikube stop 2>/dev/null || true minikube stop 2>/dev/null || true
minikube start minikube start
changed_when: true changed_when: true
- name: Restart CRI-O in minikube
ansible.builtin.command:
cmd: minikube ssh "sudo systemctl restart crio"
changed_when: true

View file

@ -56,3 +56,42 @@
ansible.builtin.debug: ansible.builtin.debug:
msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}" msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}"
when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout
# Configure CRI-O to use zot as pull-through cache
- name: Copy zot mirror config to temp location
ansible.builtin.copy:
src: zot-mirror.conf
dest: /tmp/zot-mirror.conf
mode: "0644"
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
- name: Check if zot mirror config exists in minikube
ansible.builtin.command:
cmd: minikube ssh "cat /etc/containers/registries.conf.d/zot-mirror.conf 2>/dev/null || echo ''"
register: minikube_existing_zot_config
changed_when: false
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
- name: Read local zot mirror config
ansible.builtin.slurp:
src: /tmp/zot-mirror.conf
register: minikube_local_zot_config
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
- name: Apply zot mirror config to minikube
ansible.builtin.shell:
cmd: |
set -o pipefail
cat /tmp/zot-mirror.conf | minikube ssh "sudo tee /etc/containers/registries.conf.d/zot-mirror.conf > /dev/null"
executable: /bin/bash
changed_when: true
when:
- minikube_final_status.rc == 0
- "'Running' in minikube_final_status.stdout"
- minikube_existing_zot_config.stdout != (minikube_local_zot_config.content | b64decode)
notify: Restart CRI-O in minikube
- name: Clean up temp config file
ansible.builtin.file:
path: /tmp/zot-mirror.conf
state: absent

24
argocd/apps/apps.yaml Normal file
View file

@ -0,0 +1,24 @@
# App-of-apps root Application
# Watches argocd/apps/ and creates/manages all Application resources
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: apps
namespace: argocd
spec:
project: default
source:
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: feature/k8s-phase1-kickoff
path: argocd/apps
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
# Auto-sync enabled: new/changed Application manifests appear automatically
# but child apps still require manual sync (they have manual sync policy)

20
argocd/apps/argocd.yaml Normal file
View file

@ -0,0 +1,20 @@
# ArgoCD self-management Application
# After bootstrap, ArgoCD manages its own deployment
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: argocd
namespace: argocd
spec:
project: default
source:
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: feature/k8s-phase1-kickoff
path: argocd/manifests/argocd
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
syncOptions:
- CreateNamespace=true
# Manual sync only - no automated sync on git push

View file

@ -0,0 +1,24 @@
# PostgreSQL Cluster for blumeops services
# Requires: CloudNativePG operator (cloudnative-pg app) and manual secret setup
#
# Before syncing, create the eblume password secret:
# kubectl create namespace databases
# op inject -i argocd/manifests/databases/secret-eblume.yaml.tpl | kubectl apply -f -
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: blumeops-pg
namespace: argocd
spec:
project: default
source:
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: feature/k8s-phase1-kickoff
path: argocd/manifests/databases
destination:
server: https://kubernetes.default.svc
namespace: databases
syncPolicy:
syncOptions:
- CreateNamespace=true
# Manual sync only - no automated sync on git push

View file

@ -0,0 +1,30 @@
# CloudNativePG Operator - PostgreSQL for Kubernetes
# Deploys the operator only; PostgreSQL clusters are created separately
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cloudnative-pg
namespace: argocd
spec:
project: default
sources:
# Helm chart from upstream
- repoURL: https://cloudnative-pg.github.io/charts
chart: cloudnative-pg
targetRevision: "0.23.0"
helm:
releaseName: cloudnative-pg
valueFiles:
- $values/argocd/manifests/cloudnative-pg/values.yaml
# Values from our git repo
- repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: feature/k8s-phase1-kickoff
ref: values
destination:
server: https://kubernetes.default.svc
namespace: cnpg-system
syncPolicy:
syncOptions:
- CreateNamespace=true
- ServerSideApply=true # Required for large CRDs that exceed annotation size limit
# Manual sync only - no automated sync on git push

View file

@ -0,0 +1,26 @@
# ArgoCD Application for Tailscale Kubernetes Operator
# Note: OAuth secret is managed separately (not in git)
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tailscale-operator
namespace: argocd
spec:
project: default
# Tailscale operator mutates externalName from "placeholder" to actual proxy service
ignoreDifferences:
- group: ""
kind: Service
jsonPointers:
- /spec/externalName
source:
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: feature/k8s-phase1-kickoff
path: argocd/manifests/tailscale-operator
destination:
server: https://kubernetes.default.svc
namespace: tailscale
syncPolicy:
syncOptions:
- CreateNamespace=true
# Manual sync only - no automated sync on git push

View file

@ -0,0 +1,114 @@
# ArgoCD
GitOps continuous delivery for Kubernetes, with self-management via ArgoCD.
## Prerequisites
- Tailscale operator deployed (see `argocd/manifests/tailscale-operator/README.md`)
- Deploy key added to forge for SSH access to blumeops repo
## Manual Bootstrap
Bootstrap is required when setting up a new cluster. After bootstrap, ArgoCD manages itself.
```bash
# 1. Create namespace
kubectl create namespace argocd
# 2. Apply ArgoCD manifests via kustomize
kubectl apply -k argocd/manifests/argocd/
# 3. Wait for ArgoCD to be ready
kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s
# 4. Get initial admin password
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d && echo
# 5. Login and change password
argocd login argocd.tail8d86e.ts.net --username admin --grpc-web
argocd account update-password
# 6. Apply repo-forge secret for SSH access to forge
PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
kubectl create secret generic repo-forge -n argocd \
--from-literal=type=git \
--from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
--from-literal=insecure=true \
--from-literal=sshPrivateKey="$PRIV_KEY" && \
kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
# 7. Apply ArgoCD Applications (self-management + app-of-apps)
kubectl apply -f argocd/apps/argocd.yaml
kubectl apply -f argocd/apps/apps.yaml
```
After step 7, ArgoCD manages itself and all applications defined in `argocd/apps/`.
## Access
- URL: https://argocd.tail8d86e.ts.net
- Username: `admin`
- Password: Stored in 1Password after initial setup
## ArgoCD CLI Commands
```bash
# Check all applications
argocd app list
# Sync a specific application
argocd app sync <app-name>
# Check application status
argocd app get <app-name>
# Hard refresh (clear git cache)
argocd app get <app-name> --hard-refresh
```
## Adding New Applications
1. Create an Application manifest in `argocd/apps/<app-name>.yaml`
2. Commit and push to forge
3. ArgoCD (via app-of-apps) automatically picks it up
Example Application:
```yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: my-app
namespace: argocd
spec:
project: default
source:
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
targetRevision: main
path: argocd/manifests/my-app
destination:
server: https://kubernetes.default.svc
namespace: my-app
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
```
## Files
| File | Description |
|------|-------------|
| `kustomization.yaml` | References upstream install.yaml + local customizations |
| `service-tailscale.yaml` | Tailscale Ingress for external access with Let's Encrypt TLS |
| `argocd-cmd-params-cm.yaml` | Patch to disable HTTPS redirect (TLS terminates at Ingress) |
| `repo-forge-secret.yaml.tpl` | Template documenting the forge SSH secret (manual) |
| `README.md` | This file |
## Notes
- **TODO:** Secrets (`repo-forge`) are not managed by ArgoCD and must be applied manually.
Future improvement: integrate with a secrets operator (e.g., External Secrets).
- ArgoCD uses Tailscale Ingress with Let's Encrypt for TLS termination.
- The `--grpc-web` flag is required for CLI access through the Tailscale ingress.

View file

@ -0,0 +1,8 @@
# ArgoCD server parameters
# Disables HTTPS redirect since TLS is terminated at Tailscale Ingress
apiVersion: v1
kind: ConfigMap
metadata:
name: argocd-cmd-params-cm
data:
server.insecure: "true"

View file

@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: argocd
resources:
- https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
- service-tailscale.yaml
patchesStrategicMerge:
- argocd-cmd-params-cm.yaml

View file

@ -0,0 +1,27 @@
# ArgoCD repository secret for forge SSH access
#
# IMPORTANT: Use ?ssh-format=openssh to get OpenSSH format (required by ArgoCD)
#
# Create the secret with:
#
# PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
# kubectl create secret generic repo-forge -n argocd \
# --from-literal=type=git \
# --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
# --from-literal=insecure=true \
# --from-literal=sshPrivateKey="$PRIV_KEY" && \
# kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
#
apiVersion: v1
kind: Secret
metadata:
name: repo-forge
namespace: argocd
labels:
argocd.argoproj.io/secret-type: repository
stringData:
type: git
url: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
insecure: "true"
sshPrivateKey: |
# Key from 1Password: op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key

View file

@ -0,0 +1,23 @@
# Tailscale Ingress for ArgoCD
# Exposes ArgoCD at https://argocd.tail8d86e.ts.net with Let's Encrypt TLS
#
# Using Ingress instead of LoadBalancer to get automatic TLS certificates.
# See: https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: argocd-server-tailscale
namespace: argocd
annotations:
tailscale.com/proxy-class: "crio-compat"
spec:
ingressClassName: tailscale
defaultBackend:
service:
name: argocd-server
port:
number: 80
tls:
- hosts:
- argocd

View file

@ -0,0 +1,52 @@
# CloudNativePG Operator
Kubernetes operator for managing PostgreSQL clusters with high availability.
## Source
- Helm chart: `cloudnative-pg` from https://cloudnative-pg.github.io/charts
- Documentation: https://cloudnative-pg.io/documentation/
## Deployment
Managed via ArgoCD Application using Helm source (not kustomize).
The Application points directly to the upstream Helm repository.
## ArgoCD CLI Commands
```bash
# Check application status
argocd app get cloudnative-pg
# Trigger a sync
argocd app sync cloudnative-pg
# View deployment history
argocd app history cloudnative-pg
```
## Verification
```bash
# Check operator pod is running
kubectl get pods -n cnpg-system
# Check operator logs
kubectl logs -n cnpg-system -l app.kubernetes.io/name=cloudnative-pg
# Check CRDs are installed
kubectl get crd | grep cnpg
```
## Files
| File | Description |
|------|-------------|
| `values.yaml` | Helm values for customization |
| `README.md` | This file |
## Notes
- The operator is deployed to `cnpg-system` namespace
- PostgreSQL clusters are created separately using the `Cluster` CRD (see Step 7)
- No secrets required for the operator itself

View file

@ -0,0 +1,4 @@
# CloudNativePG Helm values
# See: https://github.com/cloudnative-pg/charts/tree/main/charts/cloudnative-pg
# Using defaults for now - customize as needed

View file

@ -0,0 +1,97 @@
# Database Manifests
PostgreSQL clusters managed by CloudNativePG operator.
## blumeops-pg
Single-instance PostgreSQL cluster for blumeops services.
### Configuration
- **Instances**: 1 (single-node for minikube)
- **Storage**: 10Gi on `standard` storage class
- **Initial database**: `miniflux` owned by `miniflux` user
### Users/Roles
| User | Role | Purpose | Password Source |
|----------|-------------|----------------------------------|------------------------------------|
| postgres | superuser | CNPG internal (avoid using) | `blumeops-pg-superuser` secret |
| miniflux | app owner | Owns miniflux database | `blumeops-pg-app` secret |
| eblume | superuser | Admin access (matches brew pg) | `blumeops-pg-eblume` secret (manual) |
### Manual Secret Setup
Before deploying, create the eblume password secret:
```bash
# Create namespace first
kubectl create namespace databases
# Apply eblume password from 1Password
op inject -i argocd/manifests/databases/secret-eblume.yaml.tpl | kubectl apply -f -
```
The `miniflux` user password is auto-generated by CloudNativePG and stored in `blumeops-pg-app`.
### Connection Information
After the cluster is healthy:
```bash
# Connect via Tailscale (temporary hostname during migration)
psql -h k8s-pg.tail8d86e.ts.net -U eblume -W -d miniflux
# Or with password from 1Password
PGPASSWORD=$(op --vault blumeops item get guxu3j7ajhjyey6xxl2ovsl2ui --fields password --reveal) \
psql -h k8s-pg.tail8d86e.ts.net -U eblume -d miniflux
# Get miniflux app credentials (for applications)
kubectl -n databases get secret blumeops-pg-app -o jsonpath='{.data.uri}' | base64 -d
# Get postgres superuser credentials (emergency only)
kubectl -n databases get secret blumeops-pg-superuser -o jsonpath='{.data.password}' | base64 -d
```
### Connecting via kubectl port-forward
Alternative if Tailscale service is unavailable:
```bash
# Terminal 1: Port-forward to the primary
kubectl -n databases port-forward svc/blumeops-pg-rw 5432:5432
# Terminal 2: Connect as eblume
PGPASSWORD=$(op --vault blumeops item get guxu3j7ajhjyey6xxl2ovsl2ui --fields password --reveal) \
psql -h localhost -U eblume -d miniflux
```
### Status
```bash
# Check cluster health
kubectl -n databases get cluster blumeops-pg
# Check pods
kubectl -n databases get pods -l cnpg.io/cluster=blumeops-pg
# Check managed roles status
kubectl -n databases get cluster blumeops-pg -o jsonpath='{.status.managedRolesStatus}' | jq
# Operator logs
kubectl -n databases logs -l cnpg.io/cluster=blumeops-pg
```
## Tailscale Exposure
### Current: Temporary Service
`k8s-pg.tail8d86e.ts.net` - LoadBalancer service for testing during migration.
### Phase 4: Production Service
After miniflux migrates to k8s, the `pg.tail8d86e.ts.net` Tailscale service will switch
from brew PostgreSQL (indri) to this k8s cluster. At that point:
1. Delete `service-tailscale.yaml` (the `k8s-pg` service)
2. Update/create a service with `tailscale.com/hostname: "pg"`
3. Verify the orphaned `k8s-pg` device is removed from tailnet

View file

@ -0,0 +1,52 @@
# PostgreSQL Cluster for blumeops services
# Managed by CloudNativePG operator
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: blumeops-pg
namespace: databases
spec:
instances: 1
storage:
size: 10Gi
storageClass: standard
# Bootstrap creates initial database and owner
bootstrap:
initdb:
database: miniflux
owner: miniflux
# Managed roles - additional users beyond the bootstrap owner
managed:
roles:
# eblume superuser for admin access (matches current brew pg setup)
- name: eblume
login: true
superuser: true
createdb: true
createrole: true
passwordSecret:
name: blumeops-pg-eblume
# Resource limits for minikube environment
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "1Gi"
cpu: "500m"
# PostgreSQL configuration
postgresql:
parameters:
max_connections: "50"
shared_buffers: "128MB"
password_encryption: "scram-sha-256"
pg_hba:
# Allow all users to connect from any IP with password auth
# Network security is handled by Tailscale
- host all all 0.0.0.0/0 scram-sha-256
- host all all ::/0 scram-sha-256

View file

@ -0,0 +1,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: databases
resources:
- blumeops-pg.yaml
- service-tailscale.yaml

View file

@ -0,0 +1,13 @@
# Template for eblume superuser password
# Apply with: op inject -i secret-eblume.yaml.tpl | kubectl apply -f -
#
# Uses the same 1Password item as the brew PostgreSQL setup on indri
apiVersion: v1
kind: Secret
metadata:
name: blumeops-pg-eblume
namespace: databases
type: kubernetes.io/basic-auth
stringData:
username: eblume
password: {{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/guxu3j7ajhjyey6xxl2ovsl2ui/password }}

View file

@ -0,0 +1,22 @@
# Tailscale LoadBalancer for PostgreSQL access
# Temporary service for testing during migration (k8s-pg.tail8d86e.ts.net)
# Will be replaced by pg.tail8d86e.ts.net in Phase 4
apiVersion: v1
kind: Service
metadata:
name: blumeops-pg-tailscale
namespace: databases
annotations:
tailscale.com/hostname: "k8s-pg"
tailscale.com/proxy-class: "crio-compat"
spec:
type: LoadBalancer
loadBalancerClass: tailscale
selector:
cnpg.io/cluster: blumeops-pg
role: primary
ports:
- name: postgresql
port: 5432
targetPort: 5432
protocol: TCP

View file

@ -0,0 +1,90 @@
# Tailscale Kubernetes Operator
Manifests for the Tailscale Kubernetes Operator, managed via ArgoCD.
## Source
- `operator.yaml` - Static manifest from https://github.com/tailscale/tailscale/tree/main/cmd/k8s-operator/deploy/manifests
- Secret block removed from `operator.yaml` - managed separately via `secret.yaml.tpl`
- Image reference changed to fully-qualified `docker.io/tailscale/k8s-operator:stable` for CRI-O compatibility
## Prerequisites
1. OAuth client in Tailscale admin console with:
- Devices: Core (Read & Write) - tag: `tag:k8s-operator`
- Auth Keys: Read & Write
- Services: Write
2. ACL with `tag:k8s-operator` owning `tag:k8s` (so operator can tag resources it creates)
## Manual Bootstrap (Before ArgoCD)
Tailscale operator must be deployed before ArgoCD since ArgoCD uses Tailscale for ingress.
```bash
# 1. Create namespace
kubectl create namespace tailscale
# 2. Apply OAuth secret (uses 1Password)
op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f -
# 3. Apply manifests via kustomize
kubectl apply -k argocd/manifests/tailscale-operator/
```
## Ongoing Management (After ArgoCD)
Once ArgoCD is running, the operator is managed by the `tailscale-operator` ArgoCD Application.
ArgoCD pulls manifests from forge and applies them automatically.
## ArgoCD CLI Commands
```bash
# Check application status
argocd app get tailscale-operator
# Trigger a sync (pull latest from forge and apply)
argocd app sync tailscale-operator
# Preview what would change without applying
argocd app diff tailscale-operator
# View deployment history
argocd app history tailscale-operator
# Hard refresh (clear cache and re-fetch from git)
argocd app get tailscale-operator --hard-refresh
```
## Verification
```bash
# Check operator pod is running
kubectl get pods -n tailscale
# Check operator logs
kubectl logs -n tailscale -l app.kubernetes.io/name=operator
```
## Files
| File | Description |
|------|-------------|
| `kustomization.yaml` | Kustomize configuration for all manifests |
| `operator.yaml` | Operator deployment, CRDs, RBAC (secret removed) |
| `proxyclass.yaml` | ProxyClass with fully-qualified images for CRI-O |
| `dnsconfig.yaml` | DNSConfig for cluster-to-tailnet name resolution |
| `egress-forge.yaml` | Egress proxy for accessing forge on indri |
| `secret.yaml.tpl` | 1Password template for OAuth credentials (manual) |
| `README.md` | This file |
## Notes
- **TODO:** The OAuth secret (`operator-oauth`) is not managed by ArgoCD and must be applied
manually. Future improvement: integrate with a secrets operator (e.g., External Secrets).
- Services using the Tailscale LoadBalancer must reference the ProxyClass:
```yaml
annotations:
tailscale.com/proxy-class: "crio-compat"
```
- The egress proxy for forge targets `indri.tail8d86e.ts.net` directly (not `forge.tail8d86e.ts.net`)
because Tailscale Serve hostnames are virtual and only work via the Tailscale client.

View file

@ -0,0 +1,16 @@
# DNSConfig for resolving MagicDNS names from within the cluster
# Deploys a nameserver that resolves ts.net names to egress proxy IPs
#
# Requires CoreDNS/kube-dns configuration to forward ts.net queries.
# See: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
---
apiVersion: tailscale.com/v1alpha1
kind: DNSConfig
metadata:
name: ts-dns
namespace: tailscale
spec:
nameserver:
image:
repo: docker.io/tailscale/k8s-nameserver
tag: stable

View file

@ -0,0 +1,20 @@
# Egress proxy to expose Forgejo (forge) to the cluster
# Forge runs on indri:3001, exposed via Tailscale Serve as forge.tail8d86e.ts.net
# We target indri directly since egress can't reach Tailscale Serve hostnames
#
# See: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
---
apiVersion: v1
kind: Service
metadata:
name: forge
namespace: tailscale
annotations:
tailscale.com/tailnet-fqdn: indri.tail8d86e.ts.net
tailscale.com/proxy-class: "crio-compat"
spec:
type: ExternalName
externalName: placeholder
ports:
- port: 3001
targetPort: 3001

View file

@ -0,0 +1,13 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: tailscale
resources:
- operator.yaml
- proxyclass.yaml
- dnsconfig.yaml
- egress-forge.yaml
# Note: OAuth secret (operator-oauth) is NOT included here.
# It must be manually applied before deploying - see README.md

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
# ProxyClass: crio-compat
#
# Why this exists:
# CRI-O (the container runtime used by minikube) cannot resolve short image
# names like "tailscale/tailscale:stable". It requires fully-qualified names
# with an explicit registry prefix (e.g., "docker.io/tailscale/tailscale:stable").
#
# The Tailscale operator creates proxy pods (StatefulSets) for each LoadBalancer
# Service or Ingress. By default, these pods use short image names which fail
# on CRI-O with "ImageInspectError".
#
# Usage:
# Add this annotation to any Tailscale Service or Ingress:
# tailscale.com/proxy-class: "crio-compat"
#
# This tells the operator to use the fully-qualified image names defined below
# when creating the proxy pod for that resource.
---
apiVersion: tailscale.com/v1alpha1
kind: ProxyClass
metadata:
name: crio-compat
spec:
statefulSet:
pod:
tailscaleContainer:
image: docker.io/tailscale/tailscale:stable
tailscaleInitContainer:
image: docker.io/tailscale/tailscale:stable

View file

@ -0,0 +1,14 @@
# Tailscale Operator OAuth Secret
# This template is processed by `op inject` to resolve 1Password references.
#
# Usage:
# op inject -i secret.yaml.tpl | kubectl apply -f -
#
apiVersion: v1
kind: Secret
metadata:
name: operator-oauth
namespace: tailscale
stringData:
client_id: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/2it22lavwgbxdskoaxanej354q/client-id }}"
client_secret: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/2it22lavwgbxdskoaxanej354q/client-secret }}"

View file

@ -83,6 +83,14 @@ check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'" check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz" check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
echo ""
echo "Kubernetes workloads (via Tailscale):"
check_http "ArgoCD" "https://argocd.tail8d86e.ts.net/healthz"
# k8s PostgreSQL - check TCP connection (no auth needed for pg_isready)
check_service "k8s-pg" "pg_isready -h k8s-pg.tail8d86e.ts.net -p 5432"
# ArgoCD apps sync status
check_service "ArgoCD apps synced" "kubectl --context=minikube-indri get applications -n argocd -o jsonpath='{.items[*].status.sync.status}' | grep -v OutOfSync"
echo "" echo ""
if [ $FAILED -eq 0 ]; then if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}All services healthy!${NC}" echo -e "${GREEN}All services healthy!${NC}"

View file

@ -0,0 +1,149 @@
# Blumeops Minikube Migration Plan
This plan details a phased migration of blumeops services from direct hosting on indri (Mac Mini M1) to a minikube cluster, while maintaining critical infrastructure services outside of Kubernetes.
## Phases
| Phase | Name | Status | Description |
|-------|------|--------|-------------|
| 0 | [Foundation](P0_foundation.complete.md) | Complete | Container registry + minikube cluster |
| 1 | [K8s Infrastructure](P1_k8s_infrastructure.md) | In Progress | Tailscale operator, ArgoCD, CloudNativePG, PostgreSQL cluster |
| 2 | [Grafana](P2_grafana.md) | Pending | Migrate Grafana (pilot) via ArgoCD |
| 3 | [PostgreSQL](P3_postgresql.md) | Pending | Data migration to k8s PostgreSQL |
| 4 | [Miniflux](P4_miniflux.md) | Pending | Migrate Miniflux via ArgoCD |
| 5 | [devpi](P5_devpi.md) | Pending | Migrate devpi via ArgoCD |
| 6 | [Kiwix](P6_kiwix.md) | Pending | Migrate Kiwix via ArgoCD |
| 7 | [Forgejo](P7_forgejo.md) | Pending | Migrate Forgejo (highest risk) via ArgoCD |
| 8 | [Woodpecker](P8_woodpecker.md) | Pending | Deploy CI/CD via ArgoCD |
| 9 | [Cleanup](P9_cleanup.md) | Pending | Remove deprecated services |
## Architecture Overview
### Services Staying on Indri (Outside K8s)
| Service | Reason |
|---------|--------|
| **Zot Registry** (NEW) | Avoid circular dependency - k8s needs images to start |
| **Prometheus** | Observability backbone must survive k8s failures |
| **Loki** | Log aggregation backbone |
| **Borgmatic** | Backup system |
| **Grafana-alloy** | Metrics/logs collector on host |
| **Plex** | Until Jellyfin replacement |
| **Transmission** | Downloads for kiwix ZIM files |
### Services Moving to K8s
| Service | Complexity | Dependencies |
|---------|------------|--------------|
| Grafana | LOW | Phase 1 |
| Kiwix | LOW | Phase 1 |
| Miniflux | MEDIUM | PostgreSQL |
| devpi | MEDIUM | Registry |
| PostgreSQL | HIGH | Phase 1 |
| Forgejo | HIGH | PostgreSQL |
| Woodpecker CI | MEDIUM | Forgejo |
## Technical Decisions
### Container Registry: Zot
- OCI-native, lightweight
- Native support for proxying multiple registries (Docker Hub, GHCR, Quay)
- Built from source at `~/code/3rd/zot` (not in homebrew)
- Binary: `~/code/3rd/zot/bin/zot-darwin-arm64`
- Config: `~/.config/zot/config.json`
- Data: `~/zot/`
### Minikube Driver: Podman
- Rootless containers for better security
- Lighter than full VM (QEMU)
- Uses existing container ecosystem
- `minikube start --driver=podman --container-runtime=cri-o`
### PostgreSQL: CloudNativePG Operator
- Production-grade operator
- Built-in backup/restore
- Prometheus metrics
- PITR support
### K8s Service Exposure: Tailscale Operator
- `loadBalancerClass: tailscale` on Services
- Automatic TLS and MagicDNS names
- ACL-controlled access
### LaunchAgent Requirements (Critical)
LaunchAgents do NOT get homebrew on PATH. All commands must use **absolute paths**:
- `/Users/erichblume/code/3rd/zot/bin/zot-darwin-arm64` for zot (built from source)
- `/opt/homebrew/opt/mise/bin/mise x --` for mise-managed tools
- `/opt/homebrew/opt/postgresql@18/bin/pg_dump` for postgres tools
This applies to all mcquack LaunchAgents (zot, devpi, kiwix, borgmatic, metrics collectors).
`brew services` handles this automatically but those aren't tracked in ansible.
### Backup Strategy
Borgmatic remains on indri (outside k8s), writing to sifaka NAS via SMB at `/Volumes/backups`. This ensures backups continue even if k8s is down.
| Service | Backup Approach |
|---------|-----------------|
| **Zot Registry** | No backup needed - pull-through cache is re-fetchable, private images rebuilt from source control |
| **Minikube** | No backup of cluster state - declarative manifests in git, can recreate |
| **PostgreSQL (k8s)** | CloudNativePG scheduled backups to sifaka (Phase 1) |
| **Grafana (k8s)** | Dashboards in ansible source control, no runtime backup needed |
| **Miniflux (k8s)** | Database backed up via CloudNativePG |
| **Forgejo (k8s)** | Git repos are distributed, config in ansible; data dir backed up via borgmatic before migration |
| **devpi (k8s)** | Private packages backed up, PyPI cache re-fetchable |
| **Kiwix (k8s)** | ZIM files re-downloadable via torrent, no backup needed |
**Borgmatic config changes:** None required for Phase 0. Future phases may add k8s PV paths if needed.
---
## Critical Files
| File | Purpose |
|------|---------|
| `ansible/playbooks/indri.yml` | Main playbook - add k8s roles, remove migrated services |
| `ansible/roles/tailscale_serve/defaults/main.yml` | Transition services to Tailscale operator |
| `pulumi/policy.hujson` | Add tags: k8s, registry, ci |
| `ansible/roles/borgmatic/defaults/main.yml` | Update PostgreSQL endpoint |
| `mise-tasks/indri-services-check` | Add k8s health checks |
## New Directory Structure
```
ansible/
k8s/
operators/
tailscale-operator.yaml
cloudnative-pg.yaml
databases/
blumeops-pg.yaml
apps/
grafana/
miniflux/
forgejo/
devpi/
kiwix/
woodpecker/
roles/
zot/ # NEW
podman/ # NEW
minikube/ # NEW
```
## Risk Mitigation
- **Circular dependency prevention**: Zot registry runs outside k8s
- **Observability**: Prometheus/Loki stay on indri
- **Data loss prevention**: borgmatic + manual backups before each phase
- **Recovery**: Can manually push images, restore from backups
## Container Images (All ARM64)
| Service | Image |
|---------|-------|
| Miniflux | `ghcr.io/miniflux/miniflux:latest` |
| Forgejo | `codeberg.org/forgejo/forgejo:10` |
| Grafana | `grafana/grafana:latest` |
| Kiwix | `ghcr.io/kiwix/kiwix-serve:3.8.1` |
| Woodpecker | `woodpeckerci/woodpecker-server` |
Note: Zot runs as a native binary on indri (built from source at `~/code/3rd/zot`), not as a container.

View file

@ -0,0 +1,657 @@
# Phase 1: Kubernetes Infrastructure
**Goal**: Tailscale operator, ArgoCD, CloudNativePG operator, PostgreSQL cluster
**Status**: In Progress
**Prerequisites**: [Phase 0](P0_foundation.complete.md) complete
---
## Overview
Phase 1 establishes the k8s control plane infrastructure:
1. **Tailscale operator** - Exposes services on the tailnet
2. **ArgoCD** - GitOps continuous delivery
3. **CloudNativePG** - PostgreSQL operator
4. **PostgreSQL cluster** - Database for future app migrations
The deployment follows a bootstrap pattern:
- First two components deployed via `kubectl apply -k` (no GitOps yet)
- ArgoCD then takes over management of all components including itself
- All subsequent deployments use ArgoCD
---
## Kubernetes Tags Overview
| Tag | Purpose | Applied To |
|-----|---------|------------|
| `tag:k8s-api` | Controls access to the K8s API server | indri (Phase 0.14) |
| `tag:k8s-operator` | Identifies the Tailscale K8s Operator | OAuth client for operator |
| `tag:k8s` | Default tag for operator-managed resources | Proxies, services, ingresses created by operator |
**Ownership chain**: `tag:k8s-operator` must own `tag:k8s` so the operator can assign that tag to devices it creates.
---
## PostgreSQL Migration Strategy
The k8s PostgreSQL cluster will eventually replace the brew PostgreSQL on indri.
| Phase | `pg.tail8d86e.ts.net` points to | Miniflux connects to |
|-------|--------------------------------|---------------------|
| Current | brew PostgreSQL (indri) | `pg.tail8d86e.ts.net` |
| Phase 1 | brew PostgreSQL (indri) | `pg.tail8d86e.ts.net` (no change) |
| Phase 4 | brew PostgreSQL (indri) | k8s PG (internal, after miniflux migrates to k8s) |
| Post-Phase 4 | k8s PostgreSQL | k8s PG (internal) |
| Cleanup | k8s PostgreSQL | k8s PG (internal) |
This allows zero-downtime migration - the Tailscale service switches after apps are migrated.
---
## Steps
### 1. Update Pulumi ACLs for k8s workloads ✓
**Status**: Complete
Added to `pulumi/policy.hujson`:
- `tag:k8s-operator` - for the operator OAuth client
- `tag:k8s` - for operator-managed resources (owned by `tag:k8s-operator`)
- Grant for `tag:k8s``tag:registry` access
---
### 2. Create Tailscale OAuth client ✓
**Status**: Complete
OAuth client stored in 1Password (vault: `vg6xf6vvfmoh5hqjjhlhbeoaie`, item: `2it22lavwgbxdskoaxanej354q`)
**Configuration used:**
- Tags: `tag:k8s-operator`
- Devices write scope tag: `tag:k8s`
- Scopes: Devices Core (R/W), Auth Keys (R/W), Services (Write)
---
### 3. Deploy Tailscale Kubernetes Operator (Bootstrap)
Deploy via `kubectl apply -k` - will be migrated to ArgoCD management in Step 5.
**Setup manifests directory:**
```bash
mkdir -p argocd/manifests/tailscale-operator
cd argocd/manifests/tailscale-operator
# Download static manifest from Tailscale repo
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/manifests/operator.yaml -o operator.yaml
# Download CRDs
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml -o crds/connectors.yaml
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml -o crds/proxyclasses.yaml
# ... (other CRDs as needed)
```
**Create kustomization.yaml:**
```yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: tailscale-system
resources:
- operator.yaml
secretGenerator:
- name: operator-oauth
namespace: tailscale-system
literals:
- client_id=PLACEHOLDER
- client_secret=PLACEHOLDER
generatorOptions:
disableNameSuffixHash: true
```
**Deploy:**
```bash
# Get credentials from 1Password and create secret manually (kustomize secretGenerator is for reference)
CLIENT_ID=$(op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 2it22lavwgbxdskoaxanej354q --fields client-id --reveal)
CLIENT_SECRET=$(op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 2it22lavwgbxdskoaxanej354q --fields client-secret --reveal)
kubectl create namespace tailscale-system
kubectl create secret generic operator-oauth \
--namespace tailscale-system \
--from-literal=client_id=$CLIENT_ID \
--from-literal=client_secret=$CLIENT_SECRET
# Apply operator manifests
kubectl apply -k argocd/manifests/tailscale-operator/
```
**Verification:**
```bash
kubectl get pods -n tailscale-system
# Expected: operator pod Running
kubectl logs -n tailscale-system -l app.kubernetes.io/name=tailscale-operator
```
---
### 4. Deploy ArgoCD
Deploy ArgoCD and expose via Tailscale as `argocd.tail8d86e.ts.net`.
**Prerequisites:**
- Add `tag:argocd` to Pulumi ACLs
- Create Tailscale service `argocd` in admin console
**Setup manifests:**
```bash
mkdir -p argocd/manifests/argocd
# Download ArgoCD install manifest
curl -sL https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml -o argocd/manifests/argocd/install.yaml
```
**Create kustomization.yaml:**
```yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: argocd
resources:
- install.yaml
- service-tailscale.yaml # LoadBalancer for Tailscale exposure
```
**Create service-tailscale.yaml:**
```yaml
apiVersion: v1
kind: Service
metadata:
name: argocd-server-tailscale
namespace: argocd
annotations:
tailscale.com/hostname: "argocd"
spec:
type: LoadBalancer
loadBalancerClass: tailscale
selector:
app.kubernetes.io/name: argocd-server
ports:
- name: https
port: 443
targetPort: 8080
```
**Deploy:**
```bash
kubectl create namespace argocd
kubectl apply -k argocd/manifests/argocd/
```
**Get initial admin password:**
```bash
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
```
**Verification:**
- https://argocd.tail8d86e.ts.net loads
- Can login with admin / <initial-password>
**Post-setup:**
1. Change admin password, store in 1Password
2. Configure git repo connection to `github.com/eblume/blumeops` (public, no auth needed)
- Note: Using GitHub mirror since ArgoCD can't easily reach forge without additional networking
---
### 5. Migrate Tailscale Operator to ArgoCD
Create ArgoCD Application to manage the Tailscale operator.
**Create argocd/apps/tailscale-operator.yaml:**
```yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tailscale-operator
namespace: argocd
spec:
project: default
source:
repoURL: https://github.com/eblume/blumeops.git
targetRevision: main
path: argocd/manifests/tailscale-operator
destination:
server: https://kubernetes.default.svc
namespace: tailscale-system
syncPolicy:
automated:
prune: true
selfHeal: true
```
**Apply:**
```bash
kubectl apply -f argocd/apps/tailscale-operator.yaml
```
**Note on secrets:** The OAuth secret was created manually in Step 3. For GitOps, consider:
- Sealed Secrets
- External Secrets Operator
- SOPS
For now, the secret remains manually managed outside of ArgoCD.
---
### 6. Deploy CloudNativePG via ArgoCD
**Setup manifests:**
```bash
mkdir -p argocd/manifests/cloudnative-pg
# Download CNPG operator manifest
curl -sL https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg/release-1.24/releases/cnpg-1.24.0.yaml -o argocd/manifests/cloudnative-pg/operator.yaml
```
**Create kustomization.yaml:**
```yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- operator.yaml
```
**Create ArgoCD Application (argocd/apps/cloudnative-pg.yaml):**
```yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cloudnative-pg
namespace: argocd
spec:
project: default
source:
repoURL: https://github.com/eblume/blumeops.git
targetRevision: main
path: argocd/manifests/cloudnative-pg
destination:
server: https://kubernetes.default.svc
namespace: cnpg-system
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
```
**Apply:**
```bash
kubectl apply -f argocd/apps/cloudnative-pg.yaml
```
**Verification:**
```bash
kubectl get pods -n cnpg-system
# Expected: cnpg-controller-manager Running
```
---
### 7. Create PostgreSQL Cluster via ArgoCD
Create the database cluster. **Not exposed via Tailscale yet** - internal only until apps migrate.
**Create argocd/manifests/databases/blumeops-pg.yaml:**
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: blumeops-pg
namespace: databases
spec:
instances: 1
storage:
size: 10Gi
storageClass: standard
monitoring:
enablePodMonitor: true
bootstrap:
initdb:
database: miniflux
owner: miniflux
```
**Create kustomization.yaml:**
```yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: databases
resources:
- blumeops-pg.yaml
```
**Create ArgoCD Application (argocd/apps/blumeops-pg.yaml):**
```yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: blumeops-pg
namespace: argocd
spec:
project: default
source:
repoURL: https://github.com/eblume/blumeops.git
targetRevision: main
path: argocd/manifests/databases
destination:
server: https://kubernetes.default.svc
namespace: databases
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
```
**Apply:**
```bash
kubectl apply -f argocd/apps/blumeops-pg.yaml
```
**Verification:**
```bash
kubectl get cluster -n databases
# Expected: blumeops-pg with STATUS "Cluster in healthy state"
kubectl get pods -n databases
# Expected: blumeops-pg-1 Running
# Get connection secret
kubectl -n databases get secret blumeops-pg-app -o jsonpath='{.data.uri}' | base64 -d
```
---
### 8. Create App-of-Apps Root Application
Once all components are deployed, create a root application to manage all apps.
**Create argocd/apps/root.yaml:**
```yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: root
namespace: argocd
spec:
project: default
source:
repoURL: https://github.com/eblume/blumeops.git
targetRevision: main
path: argocd/apps
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
```
**Apply:**
```bash
kubectl apply -f argocd/apps/root.yaml
```
Now ArgoCD manages itself and all other applications via the app-of-apps pattern.
---
## New Files Summary
```
argocd/
apps/
root.yaml # App-of-apps root
tailscale-operator.yaml # Tailscale operator app
cloudnative-pg.yaml # CNPG operator app
blumeops-pg.yaml # PostgreSQL cluster app
manifests/
tailscale-operator/
kustomization.yaml
operator.yaml
argocd/
kustomization.yaml
install.yaml
service-tailscale.yaml
cloudnative-pg/
kustomization.yaml
operator.yaml
databases/
kustomization.yaml
blumeops-pg.yaml
```
---
## Pulumi ACL Updates Required
Add to `pulumi/policy.hujson`:
```hujson
"tag:argocd": ["autogroup:admin", "tag:blumeops"],
```
Add to Erich's test accept list:
```hujson
"accept": [..., "tag:argocd:443"],
```
Add to Allison's deny list:
```hujson
"deny": [..., "tag:argocd:443"],
```
---
## Verification Checklist
```bash
# 1. Tailscale operator running
kubectl get pods -n tailscale-system
# 2. ArgoCD accessible
curl -k https://argocd.tail8d86e.ts.net/healthz
# 3. CloudNativePG operator running
kubectl get pods -n cnpg-system
# 4. PostgreSQL cluster healthy
kubectl get cluster -n databases
# 5. All ArgoCD apps synced
kubectl get applications -n argocd
# All should show STATUS: Synced, HEALTH: Healthy
```
---
## Rollback
```bash
# Remove ArgoCD apps (will cascade delete managed resources)
kubectl delete application -n argocd root
kubectl delete application -n argocd blumeops-pg
kubectl delete application -n argocd cloudnative-pg
kubectl delete application -n argocd tailscale-operator
# Remove ArgoCD
kubectl delete -k argocd/manifests/argocd/
kubectl delete namespace argocd
# Remove namespaces
kubectl delete namespace databases
kubectl delete namespace cnpg-system
kubectl delete namespace tailscale-system
# Revert ACL changes
git checkout pulumi/policy.hujson
mise run tailnet-up
```
---
## Implementation Notes (Deviations from Plan)
*Added during implementation for retrospective review*
### Git Source: Forge Instead of GitHub
**Plan**: Use GitHub mirror (`github.com/eblume/blumeops`)
**Actual**: Use internal Forgejo (`ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git`)
**Why**: User preference to use internal infrastructure, accepting circular dependency for later.
**Required changes**:
- Deploy key added to forge for ArgoCD SSH access
- Repository secret `repo-forge` with SSH private key from 1Password
- Discovered: `op read` requires `?ssh-format=openssh` query parameter for ArgoCD-compatible key format
- Egress proxy service to reach forge from cluster (targets `indri.tail8d86e.ts.net` not `forge.tail8d86e.ts.net` due to Tailscale Serve limitation)
- DNSConfig CRD for cluster-to-tailnet MagicDNS resolution
- ACL grant: `tag:k8s``tag:homelab` on ports 3001 (HTTP) and 2200 (SSH)
### ArgoCD Exposure: Ingress Instead of LoadBalancer
**Plan**: LoadBalancer service with `tailscale.com/hostname` annotation
**Actual**: Tailscale Ingress with Let's Encrypt TLS termination
**Why**: Ingress provides automatic TLS certificates and is the recommended approach.
**File**: `argocd/manifests/argocd/service-tailscale.yaml` uses `kind: Ingress` with `ingressClassName: tailscale`
### Namespace: `tailscale` Instead of `tailscale-system`
**Plan**: `tailscale-system` namespace
**Actual**: `tailscale` namespace
**Why**: Matches upstream Tailscale operator defaults.
### Sync Policy: Manual Instead of Automated
**Plan**: `syncPolicy.automated` with prune and selfHeal
**Actual**: Manual sync policy for workload apps; auto-sync only for app-of-apps
**Why**: User preference for explicit control over deployments during initial migration phase.
**Pattern**:
- `apps.yaml` (app-of-apps): auto-sync to pick up new Application manifests
- All workload apps: manual sync requires `argocd app sync <name>`
### CloudNativePG: Helm Chart Instead of Raw Manifest
**Plan**: Download raw CNPG manifest
**Actual**: Multi-source Application using official Helm chart from `https://cloudnative-pg.github.io/charts`
**Why**: Helm chart is the officially supported distribution method.
**Additional fix**: Required `ServerSideApply=true` sync option due to large CRD exceeding annotation size limit.
### App-of-Apps: Named `apps` Instead of `root`
**Plan**: `argocd/apps/root.yaml`
**Actual**: `argocd/apps/apps.yaml` with Application named `apps`
**Why**: Clearer naming; `apps` manages apps, `argocd` manages itself.
### ArgoCD Self-Management Added
**Plan**: Not explicitly planned
**Actual**: `argocd/apps/argocd.yaml` Application for ArgoCD self-management
**Why**: Standard GitOps pattern - ArgoCD manages its own deployment after bootstrap.
### CRI-O Registry Mirror for Zot
**Plan**: Not in original plan
**Actual**: Configured CRI-O to use zot as pull-through cache for docker.io, ghcr.io, quay.io
**Why**: Reduces external bandwidth, speeds up pulls, avoids rate limits.
**Implementation**: Ansible `minikube` role applies `/etc/containers/registries.conf.d/zot-mirror.conf` inside minikube VM using stable hostname `host.containers.internal:5050`.
### ProxyClass for CRI-O Image Compatibility
**Plan**: Not mentioned
**Actual**: Required `ProxyClass` with fully-qualified image paths (`docker.io/tailscale/...`)
**Why**: CRI-O requires fully-qualified image references; default Tailscale operator uses short names.
### Actual File Structure
```
argocd/
apps/
apps.yaml # App-of-apps (auto-sync)
argocd.yaml # ArgoCD self-management (manual sync)
tailscale-operator.yaml # Tailscale operator (manual sync)
cloudnative-pg.yaml # CNPG operator via Helm (manual sync)
manifests/
tailscale-operator/
kustomization.yaml
operator.yaml
proxyclass.yaml # CRI-O compatibility
dnsconfig.yaml # Cluster-to-tailnet DNS
egress-forge.yaml # Egress proxy for forge
secret.yaml.tpl # OAuth secret template (manual)
README.md
argocd/
kustomization.yaml # Uses remote base from upstream
service-tailscale.yaml # Ingress (not LoadBalancer)
argocd-cmd-params-cm.yaml # Disable HTTPS redirect
repo-forge-secret.yaml.tpl # SSH key template (manual)
README.md
cloudnative-pg/
values.yaml # Helm values (currently minimal)
README.md
```
### Bootstrap Commands (Actual)
```bash
# 1. Create namespaces
kubectl create namespace tailscale
kubectl create namespace argocd
# 2. Apply secrets (manual, uses 1Password)
op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f -
PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
kubectl create secret generic repo-forge -n argocd \
--from-literal=type=git \
--from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
--from-literal=insecure=true \
--from-literal=sshPrivateKey="$PRIV_KEY" && \
kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
# 3. Bootstrap tailscale-operator
kubectl apply -k argocd/manifests/tailscale-operator/
# 4. Bootstrap ArgoCD
kubectl apply -k argocd/manifests/argocd/
# 5. Login and change password
argocd login argocd.tail8d86e.ts.net --username admin --grpc-web
argocd account update-password
# 6. Apply ArgoCD Applications
kubectl apply -f argocd/apps/argocd.yaml
kubectl apply -f argocd/apps/apps.yaml
# 7. Sync workloads
argocd app sync tailscale-operator
argocd app sync cloudnative-pg
```

View file

@ -0,0 +1,52 @@
# Phase 2: Grafana Migration (Pilot)
**Goal**: Migrate Grafana as lowest-risk pilot service
**Status**: Pending
**Prerequisites**: [Phase 1](P1_k8s_infrastructure.md) complete
---
## Steps
### 1. Deploy Grafana via Helm
- Copy datasource config from existing role
- Copy dashboards from `ansible/roles/grafana/files/dashboards/`
- Point to indri Prometheus/Loki (http://indri:9090, http://indri:3100)
---
### 2. Configure Tailscale LoadBalancer
```yaml
service:
type: LoadBalancer
loadBalancerClass: tailscale
```
---
### 3. Verify all dashboards work
---
### 4. Update tailscale_serve
Remove grafana entry from `ansible/roles/tailscale_serve/defaults/main.yml`
---
### 5. Stop brew grafana
```bash
brew services stop grafana
```
---
## Verification
- https://grafana.tail8d86e.ts.net loads
- All dashboards functional

View file

@ -0,0 +1,55 @@
# Phase 3: PostgreSQL Migration
**Goal**: Migrate miniflux database to CloudNativePG
**Status**: Pending
**Prerequisites**: [Phase 2](P2_grafana.md) complete
---
## Steps
### 1. Create databases and users in k8s PostgreSQL
- miniflux database/user
- borgmatic read-only user
---
### 2. Export from brew PostgreSQL
```bash
pg_dump -h localhost -U miniflux miniflux > miniflux_backup.sql
```
---
### 3. Expose k8s PostgreSQL via Tailscale
- Service with `loadBalancerClass: tailscale`
- Tag: `svc:pg-k8s`
---
### 4. Import data
```bash
psql -h pg-k8s.tail8d86e.ts.net -U miniflux miniflux < miniflux_backup.sql
```
---
### 5. Update borgmatic config
- Change hostname to k8s PostgreSQL
---
### 6. Verify data integrity
---
## Rollback
Keep brew PostgreSQL running until Phase 4 verified

View file

@ -0,0 +1,48 @@
# Phase 4: Miniflux Migration
**Goal**: Migrate Miniflux to k8s
**Status**: Pending
**Prerequisites**: [Phase 3](P3_postgresql.md) complete
---
## Steps
### 1. Deploy Miniflux
```yaml
image: ghcr.io/miniflux/miniflux:latest
env:
DATABASE_URL: from secret
RUN_MIGRATIONS: "1"
```
---
### 2. Configure Tailscale LoadBalancer
Tag: `svc:feed`
---
### 3. Update Alloy log collection
Add k8s namespace
---
### 4. Verify
- Login works
- Feeds refresh
- API works
---
### 5. Stop brew miniflux
```bash
brew services stop miniflux
```

View file

@ -0,0 +1,37 @@
# Phase 5: devpi Migration
**Goal**: Migrate devpi to k8s
**Status**: Pending
**Prerequisites**: [Phase 4](P4_miniflux.md) complete
---
## Steps
### 1. Build devpi container
- Dockerfile with devpi-server + devpi-web
- Push to local Zot registry
---
### 2. Deploy as StatefulSet
- PVC for data (50Gi)
- Migrate existing data (excluding PyPI cache)
---
### 3. Configure Tailscale LoadBalancer
Tag: `svc:pypi`
---
### 4. Update pip.conf on gilbert
---
### 5. Stop mcquack devpi

View file

@ -0,0 +1,35 @@
# Phase 6: Kiwix Migration
**Goal**: Migrate kiwix-serve to k8s
**Status**: Pending
**Prerequisites**: [Phase 5](P5_devpi.md) complete
---
## Steps
### 1. Create NFS/hostPath PV for ZIM files
- Point to transmission download directory
- ReadOnlyMany access
---
### 2. Deploy Kiwix
```yaml
image: ghcr.io/kiwix/kiwix-serve:3.8.1
args: ["/data/*.zim"]
```
---
### 3. Configure Tailscale LoadBalancer
Tag: `svc:kiwix`
---
### 4. Stop mcquack kiwix-serve

View file

@ -0,0 +1,51 @@
# Phase 7: Forgejo Migration (Highest Risk)
**Goal**: Migrate Forgejo to k8s
**Status**: Pending
**Prerequisites**: [Phase 6](P6_kiwix.md) complete
---
## Pre-Migration Checklist
- [ ] Full borgmatic backup verified
- [ ] Manual backup of `/opt/homebrew/var/forgejo`
- [ ] Document SSH keys and webhooks
---
## Steps
### 1. Deploy Forgejo via Helm
```bash
helm install forgejo forgejo/forgejo \
--namespace forgejo --create-namespace
```
---
### 2. Migrate data
- Stop brew forgejo
- Copy data to PVC
- Start k8s forgejo
---
### 3. Configure Tailscale services
- HTTPS 443 via LoadBalancer
- SSH port 22 (TCP proxy)
---
### 4. Verify all repositories accessible
---
## Rollback
Restore brew forgejo and tailscale serve config

View file

@ -0,0 +1,32 @@
# Phase 8: CI/CD (Woodpecker)
**Goal**: Deploy Woodpecker CI integrated with Forgejo
**Status**: Pending
**Prerequisites**: [Phase 7](P7_forgejo.md) complete
---
## Steps
### 1. Create Forgejo OAuth application
- Callback: https://ci.tail8d86e.ts.net/authorize
- Store in 1Password
---
### 2. Deploy Woodpecker Server + Agent
---
### 3. Configure Tailscale LoadBalancer
Tag: `svc:ci`
---
### 4. Test pipeline
Create `.woodpecker.yaml` in test repo

View file

@ -0,0 +1,52 @@
# Phase 9: Cleanup
**Goal**: Remove deprecated services, harden system
**Status**: Pending
**Prerequisites**: [Phase 8](P8_woodpecker.md) complete
---
## Steps
### 1. Stop/remove unused brew services
- postgresql@18
- grafana
- miniflux
- forgejo
---
### 2. Update ansible playbook
- Remove migrated service roles
- Add k8s deployment references
---
### 3. Configure Velero backups (optional)
- Install with MinIO on sifaka
- Schedule daily cluster backups
---
### 4. Update zk documentation
- New architecture
- Runbooks
- DR procedures
---
## Plan Completion
When all phases are complete and verified:
```bash
# Rename this folder to indicate completion
git mv plans/k8s-migration plans/k8s-migration.complete
git commit -m "Complete k8s migration plan"
```

View file

@ -59,6 +59,21 @@
"dst": ["tag:nas"], "dst": ["tag:nas"],
"ip": ["*"], "ip": ["*"],
}, },
// --- Kubernetes workloads ---
// k8s workloads (e.g., Woodpecker CI) can push/pull from registry
{
"src": ["tag:k8s"],
"dst": ["tag:registry"],
"ip": ["tcp:443"],
},
// k8s workloads (e.g., ArgoCD) can access forge on indri for GitOps
// HTTP on 3001, SSH on 2200
{
"src": ["tag:k8s"],
"dst": ["tag:homelab"],
"ip": ["tcp:3001", "tcp:2200"],
},
], ],
// ============== SSH Access ============== // ============== SSH Access ==============
@ -103,6 +118,8 @@
"tag:feed": ["autogroup:admin", "tag:blumeops"], "tag:feed": ["autogroup:admin", "tag:blumeops"],
"tag:registry": ["autogroup:admin", "tag:blumeops"], "tag:registry": ["autogroup:admin", "tag:blumeops"],
"tag:k8s-api": ["autogroup:admin", "tag:blumeops"], "tag:k8s-api": ["autogroup:admin", "tag:blumeops"],
"tag:k8s-operator": ["autogroup:admin", "tag:blumeops"],
"tag:k8s": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"],
}, },
// ============== ACL Tests ============== // ============== ACL Tests ==============
@ -123,5 +140,10 @@
"src": "tag:homelab", "src": "tag:homelab",
"accept": ["tag:homelab:22", "tag:nas:445"], "accept": ["tag:homelab:22", "tag:nas:445"],
}, },
// K8s workloads can reach registry and forge (on indri:3001 HTTP, :2200 SSH)
{
"src": "tag:k8s",
"accept": ["tag:registry:443", "tag:homelab:3001", "tag:homelab:2200"],
},
], ],
} }