K8s Migration Phase 1: Infrastructure Setup (#29)
## Summary - Split k8s migration plan into phases folder for easier navigation - Added `tag:k8s` to Pulumi ACLs for Kubernetes workloads - Phase 1 work in progress ## Phase 1 Goals - Tailscale Kubernetes Operator - CloudNativePG Operator - PostgreSQL cluster for future app migrations ## Deployment and Testing - [ ] Review Phase 1 plan - [ ] `mise run tailnet-preview` to verify ACL changes - [ ] `mise run tailnet-up` to apply ACL changes - [ ] Create Tailscale OAuth client (manual) - [ ] Deploy operators and PostgreSQL cluster 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/29
This commit is contained in:
parent
61dced048b
commit
a8f4d00294
42 changed files with 7501 additions and 547 deletions
|
|
@ -27,3 +27,5 @@ rules:
|
|||
ignore:
|
||||
- .venv/
|
||||
- pulumi/.venv/
|
||||
# Third-party k8s manifest with non-standard formatting
|
||||
- argocd/manifests/tailscale-operator/operator.yaml
|
||||
|
|
|
|||
1
Brewfile
1
Brewfile
|
|
@ -1,4 +1,5 @@
|
|||
# CLI tools for blumeops management
|
||||
brew "argocd" # ArgoCD CLI for GitOps management
|
||||
brew "bat" # Syntax-highlighted file concatenation
|
||||
brew "tea" # Gitea/Forgejo CLI for forge.tail8d86e.ts.net
|
||||
brew "podman" # Container CLI (uses VM on macOS, for building/pushing images)
|
||||
|
|
|
|||
27
ansible/roles/minikube/files/zot-mirror.conf
Normal file
27
ansible/roles/minikube/files/zot-mirror.conf
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# Zot pull-through cache on indri
|
||||
# Uses host.containers.internal which is stable across restarts
|
||||
# Applied by ansible minikube role
|
||||
|
||||
[[registry]]
|
||||
prefix = "docker.io"
|
||||
location = "docker.io"
|
||||
|
||||
[[registry.mirror]]
|
||||
location = "host.containers.internal:5050/docker.io"
|
||||
insecure = true
|
||||
|
||||
[[registry]]
|
||||
prefix = "ghcr.io"
|
||||
location = "ghcr.io"
|
||||
|
||||
[[registry.mirror]]
|
||||
location = "host.containers.internal:5050/ghcr.io"
|
||||
insecure = true
|
||||
|
||||
[[registry]]
|
||||
prefix = "quay.io"
|
||||
location = "quay.io"
|
||||
|
||||
[[registry.mirror]]
|
||||
location = "host.containers.internal:5050/quay.io"
|
||||
insecure = true
|
||||
|
|
@ -7,3 +7,8 @@
|
|||
minikube stop 2>/dev/null || true
|
||||
minikube start
|
||||
changed_when: true
|
||||
|
||||
- name: Restart CRI-O in minikube
|
||||
ansible.builtin.command:
|
||||
cmd: minikube ssh "sudo systemctl restart crio"
|
||||
changed_when: true
|
||||
|
|
|
|||
|
|
@ -56,3 +56,42 @@
|
|||
ansible.builtin.debug:
|
||||
msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}"
|
||||
when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout
|
||||
|
||||
# Configure CRI-O to use zot as pull-through cache
|
||||
- name: Copy zot mirror config to temp location
|
||||
ansible.builtin.copy:
|
||||
src: zot-mirror.conf
|
||||
dest: /tmp/zot-mirror.conf
|
||||
mode: "0644"
|
||||
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
|
||||
|
||||
- name: Check if zot mirror config exists in minikube
|
||||
ansible.builtin.command:
|
||||
cmd: minikube ssh "cat /etc/containers/registries.conf.d/zot-mirror.conf 2>/dev/null || echo ''"
|
||||
register: minikube_existing_zot_config
|
||||
changed_when: false
|
||||
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
|
||||
|
||||
- name: Read local zot mirror config
|
||||
ansible.builtin.slurp:
|
||||
src: /tmp/zot-mirror.conf
|
||||
register: minikube_local_zot_config
|
||||
when: minikube_final_status.rc == 0 and 'Running' in minikube_final_status.stdout
|
||||
|
||||
- name: Apply zot mirror config to minikube
|
||||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
set -o pipefail
|
||||
cat /tmp/zot-mirror.conf | minikube ssh "sudo tee /etc/containers/registries.conf.d/zot-mirror.conf > /dev/null"
|
||||
executable: /bin/bash
|
||||
changed_when: true
|
||||
when:
|
||||
- minikube_final_status.rc == 0
|
||||
- "'Running' in minikube_final_status.stdout"
|
||||
- minikube_existing_zot_config.stdout != (minikube_local_zot_config.content | b64decode)
|
||||
notify: Restart CRI-O in minikube
|
||||
|
||||
- name: Clean up temp config file
|
||||
ansible.builtin.file:
|
||||
path: /tmp/zot-mirror.conf
|
||||
state: absent
|
||||
|
|
|
|||
24
argocd/apps/apps.yaml
Normal file
24
argocd/apps/apps.yaml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# App-of-apps root Application
|
||||
# Watches argocd/apps/ and creates/manages all Application resources
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: apps
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: feature/k8s-phase1-kickoff
|
||||
path: argocd/apps
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: argocd
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
# Auto-sync enabled: new/changed Application manifests appear automatically
|
||||
# but child apps still require manual sync (they have manual sync policy)
|
||||
20
argocd/apps/argocd.yaml
Normal file
20
argocd/apps/argocd.yaml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# ArgoCD self-management Application
|
||||
# After bootstrap, ArgoCD manages its own deployment
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: argocd
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: feature/k8s-phase1-kickoff
|
||||
path: argocd/manifests/argocd
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: argocd
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
# Manual sync only - no automated sync on git push
|
||||
24
argocd/apps/blumeops-pg.yaml
Normal file
24
argocd/apps/blumeops-pg.yaml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# PostgreSQL Cluster for blumeops services
|
||||
# Requires: CloudNativePG operator (cloudnative-pg app) and manual secret setup
|
||||
#
|
||||
# Before syncing, create the eblume password secret:
|
||||
# kubectl create namespace databases
|
||||
# op inject -i argocd/manifests/databases/secret-eblume.yaml.tpl | kubectl apply -f -
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: blumeops-pg
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: feature/k8s-phase1-kickoff
|
||||
path: argocd/manifests/databases
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: databases
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
# Manual sync only - no automated sync on git push
|
||||
30
argocd/apps/cloudnative-pg.yaml
Normal file
30
argocd/apps/cloudnative-pg.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# CloudNativePG Operator - PostgreSQL for Kubernetes
|
||||
# Deploys the operator only; PostgreSQL clusters are created separately
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: cloudnative-pg
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
sources:
|
||||
# Helm chart from upstream
|
||||
- repoURL: https://cloudnative-pg.github.io/charts
|
||||
chart: cloudnative-pg
|
||||
targetRevision: "0.23.0"
|
||||
helm:
|
||||
releaseName: cloudnative-pg
|
||||
valueFiles:
|
||||
- $values/argocd/manifests/cloudnative-pg/values.yaml
|
||||
# Values from our git repo
|
||||
- repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: feature/k8s-phase1-kickoff
|
||||
ref: values
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: cnpg-system
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true # Required for large CRDs that exceed annotation size limit
|
||||
# Manual sync only - no automated sync on git push
|
||||
26
argocd/apps/tailscale-operator.yaml
Normal file
26
argocd/apps/tailscale-operator.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# ArgoCD Application for Tailscale Kubernetes Operator
|
||||
# Note: OAuth secret is managed separately (not in git)
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: tailscale-operator
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
# Tailscale operator mutates externalName from "placeholder" to actual proxy service
|
||||
ignoreDifferences:
|
||||
- group: ""
|
||||
kind: Service
|
||||
jsonPointers:
|
||||
- /spec/externalName
|
||||
source:
|
||||
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: feature/k8s-phase1-kickoff
|
||||
path: argocd/manifests/tailscale-operator
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: tailscale
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
# Manual sync only - no automated sync on git push
|
||||
114
argocd/manifests/argocd/README.md
Normal file
114
argocd/manifests/argocd/README.md
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
# ArgoCD
|
||||
|
||||
GitOps continuous delivery for Kubernetes, with self-management via ArgoCD.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Tailscale operator deployed (see `argocd/manifests/tailscale-operator/README.md`)
|
||||
- Deploy key added to forge for SSH access to blumeops repo
|
||||
|
||||
## Manual Bootstrap
|
||||
|
||||
Bootstrap is required when setting up a new cluster. After bootstrap, ArgoCD manages itself.
|
||||
|
||||
```bash
|
||||
# 1. Create namespace
|
||||
kubectl create namespace argocd
|
||||
|
||||
# 2. Apply ArgoCD manifests via kustomize
|
||||
kubectl apply -k argocd/manifests/argocd/
|
||||
|
||||
# 3. Wait for ArgoCD to be ready
|
||||
kubectl wait --for=condition=available deployment/argocd-server -n argocd --timeout=300s
|
||||
|
||||
# 4. Get initial admin password
|
||||
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d && echo
|
||||
|
||||
# 5. Login and change password
|
||||
argocd login argocd.tail8d86e.ts.net --username admin --grpc-web
|
||||
argocd account update-password
|
||||
|
||||
# 6. Apply repo-forge secret for SSH access to forge
|
||||
PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
|
||||
kubectl create secret generic repo-forge -n argocd \
|
||||
--from-literal=type=git \
|
||||
--from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
|
||||
--from-literal=insecure=true \
|
||||
--from-literal=sshPrivateKey="$PRIV_KEY" && \
|
||||
kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
|
||||
|
||||
# 7. Apply ArgoCD Applications (self-management + app-of-apps)
|
||||
kubectl apply -f argocd/apps/argocd.yaml
|
||||
kubectl apply -f argocd/apps/apps.yaml
|
||||
```
|
||||
|
||||
After step 7, ArgoCD manages itself and all applications defined in `argocd/apps/`.
|
||||
|
||||
## Access
|
||||
|
||||
- URL: https://argocd.tail8d86e.ts.net
|
||||
- Username: `admin`
|
||||
- Password: Stored in 1Password after initial setup
|
||||
|
||||
## ArgoCD CLI Commands
|
||||
|
||||
```bash
|
||||
# Check all applications
|
||||
argocd app list
|
||||
|
||||
# Sync a specific application
|
||||
argocd app sync <app-name>
|
||||
|
||||
# Check application status
|
||||
argocd app get <app-name>
|
||||
|
||||
# Hard refresh (clear git cache)
|
||||
argocd app get <app-name> --hard-refresh
|
||||
```
|
||||
|
||||
## Adding New Applications
|
||||
|
||||
1. Create an Application manifest in `argocd/apps/<app-name>.yaml`
|
||||
2. Commit and push to forge
|
||||
3. ArgoCD (via app-of-apps) automatically picks it up
|
||||
|
||||
Example Application:
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: my-app
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/my-app
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: my-app
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `kustomization.yaml` | References upstream install.yaml + local customizations |
|
||||
| `service-tailscale.yaml` | Tailscale Ingress for external access with Let's Encrypt TLS |
|
||||
| `argocd-cmd-params-cm.yaml` | Patch to disable HTTPS redirect (TLS terminates at Ingress) |
|
||||
| `repo-forge-secret.yaml.tpl` | Template documenting the forge SSH secret (manual) |
|
||||
| `README.md` | This file |
|
||||
|
||||
## Notes
|
||||
|
||||
- **TODO:** Secrets (`repo-forge`) are not managed by ArgoCD and must be applied manually.
|
||||
Future improvement: integrate with a secrets operator (e.g., External Secrets).
|
||||
- ArgoCD uses Tailscale Ingress with Let's Encrypt for TLS termination.
|
||||
- The `--grpc-web` flag is required for CLI access through the Tailscale ingress.
|
||||
8
argocd/manifests/argocd/argocd-cmd-params-cm.yaml
Normal file
8
argocd/manifests/argocd/argocd-cmd-params-cm.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# ArgoCD server parameters
|
||||
# Disables HTTPS redirect since TLS is terminated at Tailscale Ingress
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: argocd-cmd-params-cm
|
||||
data:
|
||||
server.insecure: "true"
|
||||
11
argocd/manifests/argocd/kustomization.yaml
Normal file
11
argocd/manifests/argocd/kustomization.yaml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: argocd
|
||||
|
||||
resources:
|
||||
- https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
|
||||
- service-tailscale.yaml
|
||||
|
||||
patchesStrategicMerge:
|
||||
- argocd-cmd-params-cm.yaml
|
||||
27
argocd/manifests/argocd/repo-forge-secret.yaml.tpl
Normal file
27
argocd/manifests/argocd/repo-forge-secret.yaml.tpl
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# ArgoCD repository secret for forge SSH access
|
||||
#
|
||||
# IMPORTANT: Use ?ssh-format=openssh to get OpenSSH format (required by ArgoCD)
|
||||
#
|
||||
# Create the secret with:
|
||||
#
|
||||
# PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
|
||||
# kubectl create secret generic repo-forge -n argocd \
|
||||
# --from-literal=type=git \
|
||||
# --from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
|
||||
# --from-literal=insecure=true \
|
||||
# --from-literal=sshPrivateKey="$PRIV_KEY" && \
|
||||
# kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
|
||||
#
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: repo-forge
|
||||
namespace: argocd
|
||||
labels:
|
||||
argocd.argoproj.io/secret-type: repository
|
||||
stringData:
|
||||
type: git
|
||||
url: ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git
|
||||
insecure: "true"
|
||||
sshPrivateKey: |
|
||||
# Key from 1Password: op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key
|
||||
23
argocd/manifests/argocd/service-tailscale.yaml
Normal file
23
argocd/manifests/argocd/service-tailscale.yaml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# Tailscale Ingress for ArgoCD
|
||||
# Exposes ArgoCD at https://argocd.tail8d86e.ts.net with Let's Encrypt TLS
|
||||
#
|
||||
# Using Ingress instead of LoadBalancer to get automatic TLS certificates.
|
||||
# See: https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: argocd-server-tailscale
|
||||
namespace: argocd
|
||||
annotations:
|
||||
tailscale.com/proxy-class: "crio-compat"
|
||||
spec:
|
||||
ingressClassName: tailscale
|
||||
defaultBackend:
|
||||
service:
|
||||
name: argocd-server
|
||||
port:
|
||||
number: 80
|
||||
tls:
|
||||
- hosts:
|
||||
- argocd
|
||||
52
argocd/manifests/cloudnative-pg/README.md
Normal file
52
argocd/manifests/cloudnative-pg/README.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# CloudNativePG Operator
|
||||
|
||||
Kubernetes operator for managing PostgreSQL clusters with high availability.
|
||||
|
||||
## Source
|
||||
|
||||
- Helm chart: `cloudnative-pg` from https://cloudnative-pg.github.io/charts
|
||||
- Documentation: https://cloudnative-pg.io/documentation/
|
||||
|
||||
## Deployment
|
||||
|
||||
Managed via ArgoCD Application using Helm source (not kustomize).
|
||||
The Application points directly to the upstream Helm repository.
|
||||
|
||||
## ArgoCD CLI Commands
|
||||
|
||||
```bash
|
||||
# Check application status
|
||||
argocd app get cloudnative-pg
|
||||
|
||||
# Trigger a sync
|
||||
argocd app sync cloudnative-pg
|
||||
|
||||
# View deployment history
|
||||
argocd app history cloudnative-pg
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
# Check operator pod is running
|
||||
kubectl get pods -n cnpg-system
|
||||
|
||||
# Check operator logs
|
||||
kubectl logs -n cnpg-system -l app.kubernetes.io/name=cloudnative-pg
|
||||
|
||||
# Check CRDs are installed
|
||||
kubectl get crd | grep cnpg
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `values.yaml` | Helm values for customization |
|
||||
| `README.md` | This file |
|
||||
|
||||
## Notes
|
||||
|
||||
- The operator is deployed to `cnpg-system` namespace
|
||||
- PostgreSQL clusters are created separately using the `Cluster` CRD (see Step 7)
|
||||
- No secrets required for the operator itself
|
||||
4
argocd/manifests/cloudnative-pg/values.yaml
Normal file
4
argocd/manifests/cloudnative-pg/values.yaml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# CloudNativePG Helm values
|
||||
# See: https://github.com/cloudnative-pg/charts/tree/main/charts/cloudnative-pg
|
||||
|
||||
# Using defaults for now - customize as needed
|
||||
97
argocd/manifests/databases/README.md
Normal file
97
argocd/manifests/databases/README.md
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Database Manifests
|
||||
|
||||
PostgreSQL clusters managed by CloudNativePG operator.
|
||||
|
||||
## blumeops-pg
|
||||
|
||||
Single-instance PostgreSQL cluster for blumeops services.
|
||||
|
||||
### Configuration
|
||||
|
||||
- **Instances**: 1 (single-node for minikube)
|
||||
- **Storage**: 10Gi on `standard` storage class
|
||||
- **Initial database**: `miniflux` owned by `miniflux` user
|
||||
|
||||
### Users/Roles
|
||||
|
||||
| User | Role | Purpose | Password Source |
|
||||
|----------|-------------|----------------------------------|------------------------------------|
|
||||
| postgres | superuser | CNPG internal (avoid using) | `blumeops-pg-superuser` secret |
|
||||
| miniflux | app owner | Owns miniflux database | `blumeops-pg-app` secret |
|
||||
| eblume | superuser | Admin access (matches brew pg) | `blumeops-pg-eblume` secret (manual) |
|
||||
|
||||
### Manual Secret Setup
|
||||
|
||||
Before deploying, create the eblume password secret:
|
||||
|
||||
```bash
|
||||
# Create namespace first
|
||||
kubectl create namespace databases
|
||||
|
||||
# Apply eblume password from 1Password
|
||||
op inject -i argocd/manifests/databases/secret-eblume.yaml.tpl | kubectl apply -f -
|
||||
```
|
||||
|
||||
The `miniflux` user password is auto-generated by CloudNativePG and stored in `blumeops-pg-app`.
|
||||
|
||||
### Connection Information
|
||||
|
||||
After the cluster is healthy:
|
||||
|
||||
```bash
|
||||
# Connect via Tailscale (temporary hostname during migration)
|
||||
psql -h k8s-pg.tail8d86e.ts.net -U eblume -W -d miniflux
|
||||
|
||||
# Or with password from 1Password
|
||||
PGPASSWORD=$(op --vault blumeops item get guxu3j7ajhjyey6xxl2ovsl2ui --fields password --reveal) \
|
||||
psql -h k8s-pg.tail8d86e.ts.net -U eblume -d miniflux
|
||||
|
||||
# Get miniflux app credentials (for applications)
|
||||
kubectl -n databases get secret blumeops-pg-app -o jsonpath='{.data.uri}' | base64 -d
|
||||
|
||||
# Get postgres superuser credentials (emergency only)
|
||||
kubectl -n databases get secret blumeops-pg-superuser -o jsonpath='{.data.password}' | base64 -d
|
||||
```
|
||||
|
||||
### Connecting via kubectl port-forward
|
||||
|
||||
Alternative if Tailscale service is unavailable:
|
||||
|
||||
```bash
|
||||
# Terminal 1: Port-forward to the primary
|
||||
kubectl -n databases port-forward svc/blumeops-pg-rw 5432:5432
|
||||
|
||||
# Terminal 2: Connect as eblume
|
||||
PGPASSWORD=$(op --vault blumeops item get guxu3j7ajhjyey6xxl2ovsl2ui --fields password --reveal) \
|
||||
psql -h localhost -U eblume -d miniflux
|
||||
```
|
||||
|
||||
### Status
|
||||
|
||||
```bash
|
||||
# Check cluster health
|
||||
kubectl -n databases get cluster blumeops-pg
|
||||
|
||||
# Check pods
|
||||
kubectl -n databases get pods -l cnpg.io/cluster=blumeops-pg
|
||||
|
||||
# Check managed roles status
|
||||
kubectl -n databases get cluster blumeops-pg -o jsonpath='{.status.managedRolesStatus}' | jq
|
||||
|
||||
# Operator logs
|
||||
kubectl -n databases logs -l cnpg.io/cluster=blumeops-pg
|
||||
```
|
||||
|
||||
## Tailscale Exposure
|
||||
|
||||
### Current: Temporary Service
|
||||
|
||||
`k8s-pg.tail8d86e.ts.net` - LoadBalancer service for testing during migration.
|
||||
|
||||
### Phase 4: Production Service
|
||||
|
||||
After miniflux migrates to k8s, the `pg.tail8d86e.ts.net` Tailscale service will switch
|
||||
from brew PostgreSQL (indri) to this k8s cluster. At that point:
|
||||
1. Delete `service-tailscale.yaml` (the `k8s-pg` service)
|
||||
2. Update/create a service with `tailscale.com/hostname: "pg"`
|
||||
3. Verify the orphaned `k8s-pg` device is removed from tailnet
|
||||
52
argocd/manifests/databases/blumeops-pg.yaml
Normal file
52
argocd/manifests/databases/blumeops-pg.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# PostgreSQL Cluster for blumeops services
|
||||
# Managed by CloudNativePG operator
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: blumeops-pg
|
||||
namespace: databases
|
||||
spec:
|
||||
instances: 1
|
||||
|
||||
storage:
|
||||
size: 10Gi
|
||||
storageClass: standard
|
||||
|
||||
# Bootstrap creates initial database and owner
|
||||
bootstrap:
|
||||
initdb:
|
||||
database: miniflux
|
||||
owner: miniflux
|
||||
|
||||
# Managed roles - additional users beyond the bootstrap owner
|
||||
managed:
|
||||
roles:
|
||||
# eblume superuser for admin access (matches current brew pg setup)
|
||||
- name: eblume
|
||||
login: true
|
||||
superuser: true
|
||||
createdb: true
|
||||
createrole: true
|
||||
passwordSecret:
|
||||
name: blumeops-pg-eblume
|
||||
|
||||
# Resource limits for minikube environment
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
|
||||
# PostgreSQL configuration
|
||||
postgresql:
|
||||
parameters:
|
||||
max_connections: "50"
|
||||
shared_buffers: "128MB"
|
||||
password_encryption: "scram-sha-256"
|
||||
pg_hba:
|
||||
# Allow all users to connect from any IP with password auth
|
||||
# Network security is handled by Tailscale
|
||||
- host all all 0.0.0.0/0 scram-sha-256
|
||||
- host all all ::/0 scram-sha-256
|
||||
8
argocd/manifests/databases/kustomization.yaml
Normal file
8
argocd/manifests/databases/kustomization.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: databases
|
||||
|
||||
resources:
|
||||
- blumeops-pg.yaml
|
||||
- service-tailscale.yaml
|
||||
13
argocd/manifests/databases/secret-eblume.yaml.tpl
Normal file
13
argocd/manifests/databases/secret-eblume.yaml.tpl
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Template for eblume superuser password
|
||||
# Apply with: op inject -i secret-eblume.yaml.tpl | kubectl apply -f -
|
||||
#
|
||||
# Uses the same 1Password item as the brew PostgreSQL setup on indri
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: blumeops-pg-eblume
|
||||
namespace: databases
|
||||
type: kubernetes.io/basic-auth
|
||||
stringData:
|
||||
username: eblume
|
||||
password: {{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/guxu3j7ajhjyey6xxl2ovsl2ui/password }}
|
||||
22
argocd/manifests/databases/service-tailscale.yaml
Normal file
22
argocd/manifests/databases/service-tailscale.yaml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# Tailscale LoadBalancer for PostgreSQL access
|
||||
# Temporary service for testing during migration (k8s-pg.tail8d86e.ts.net)
|
||||
# Will be replaced by pg.tail8d86e.ts.net in Phase 4
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: blumeops-pg-tailscale
|
||||
namespace: databases
|
||||
annotations:
|
||||
tailscale.com/hostname: "k8s-pg"
|
||||
tailscale.com/proxy-class: "crio-compat"
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
loadBalancerClass: tailscale
|
||||
selector:
|
||||
cnpg.io/cluster: blumeops-pg
|
||||
role: primary
|
||||
ports:
|
||||
- name: postgresql
|
||||
port: 5432
|
||||
targetPort: 5432
|
||||
protocol: TCP
|
||||
90
argocd/manifests/tailscale-operator/README.md
Normal file
90
argocd/manifests/tailscale-operator/README.md
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# Tailscale Kubernetes Operator
|
||||
|
||||
Manifests for the Tailscale Kubernetes Operator, managed via ArgoCD.
|
||||
|
||||
## Source
|
||||
|
||||
- `operator.yaml` - Static manifest from https://github.com/tailscale/tailscale/tree/main/cmd/k8s-operator/deploy/manifests
|
||||
- Secret block removed from `operator.yaml` - managed separately via `secret.yaml.tpl`
|
||||
- Image reference changed to fully-qualified `docker.io/tailscale/k8s-operator:stable` for CRI-O compatibility
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. OAuth client in Tailscale admin console with:
|
||||
- Devices: Core (Read & Write) - tag: `tag:k8s-operator`
|
||||
- Auth Keys: Read & Write
|
||||
- Services: Write
|
||||
2. ACL with `tag:k8s-operator` owning `tag:k8s` (so operator can tag resources it creates)
|
||||
|
||||
## Manual Bootstrap (Before ArgoCD)
|
||||
|
||||
Tailscale operator must be deployed before ArgoCD since ArgoCD uses Tailscale for ingress.
|
||||
|
||||
```bash
|
||||
# 1. Create namespace
|
||||
kubectl create namespace tailscale
|
||||
|
||||
# 2. Apply OAuth secret (uses 1Password)
|
||||
op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f -
|
||||
|
||||
# 3. Apply manifests via kustomize
|
||||
kubectl apply -k argocd/manifests/tailscale-operator/
|
||||
```
|
||||
|
||||
## Ongoing Management (After ArgoCD)
|
||||
|
||||
Once ArgoCD is running, the operator is managed by the `tailscale-operator` ArgoCD Application.
|
||||
ArgoCD pulls manifests from forge and applies them automatically.
|
||||
|
||||
## ArgoCD CLI Commands
|
||||
|
||||
```bash
|
||||
# Check application status
|
||||
argocd app get tailscale-operator
|
||||
|
||||
# Trigger a sync (pull latest from forge and apply)
|
||||
argocd app sync tailscale-operator
|
||||
|
||||
# Preview what would change without applying
|
||||
argocd app diff tailscale-operator
|
||||
|
||||
# View deployment history
|
||||
argocd app history tailscale-operator
|
||||
|
||||
# Hard refresh (clear cache and re-fetch from git)
|
||||
argocd app get tailscale-operator --hard-refresh
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
# Check operator pod is running
|
||||
kubectl get pods -n tailscale
|
||||
|
||||
# Check operator logs
|
||||
kubectl logs -n tailscale -l app.kubernetes.io/name=operator
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `kustomization.yaml` | Kustomize configuration for all manifests |
|
||||
| `operator.yaml` | Operator deployment, CRDs, RBAC (secret removed) |
|
||||
| `proxyclass.yaml` | ProxyClass with fully-qualified images for CRI-O |
|
||||
| `dnsconfig.yaml` | DNSConfig for cluster-to-tailnet name resolution |
|
||||
| `egress-forge.yaml` | Egress proxy for accessing forge on indri |
|
||||
| `secret.yaml.tpl` | 1Password template for OAuth credentials (manual) |
|
||||
| `README.md` | This file |
|
||||
|
||||
## Notes
|
||||
|
||||
- **TODO:** The OAuth secret (`operator-oauth`) is not managed by ArgoCD and must be applied
|
||||
manually. Future improvement: integrate with a secrets operator (e.g., External Secrets).
|
||||
- Services using the Tailscale LoadBalancer must reference the ProxyClass:
|
||||
```yaml
|
||||
annotations:
|
||||
tailscale.com/proxy-class: "crio-compat"
|
||||
```
|
||||
- The egress proxy for forge targets `indri.tail8d86e.ts.net` directly (not `forge.tail8d86e.ts.net`)
|
||||
because Tailscale Serve hostnames are virtual and only work via the Tailscale client.
|
||||
16
argocd/manifests/tailscale-operator/dnsconfig.yaml
Normal file
16
argocd/manifests/tailscale-operator/dnsconfig.yaml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# DNSConfig for resolving MagicDNS names from within the cluster
|
||||
# Deploys a nameserver that resolves ts.net names to egress proxy IPs
|
||||
#
|
||||
# Requires CoreDNS/kube-dns configuration to forward ts.net queries.
|
||||
# See: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
|
||||
---
|
||||
apiVersion: tailscale.com/v1alpha1
|
||||
kind: DNSConfig
|
||||
metadata:
|
||||
name: ts-dns
|
||||
namespace: tailscale
|
||||
spec:
|
||||
nameserver:
|
||||
image:
|
||||
repo: docker.io/tailscale/k8s-nameserver
|
||||
tag: stable
|
||||
20
argocd/manifests/tailscale-operator/egress-forge.yaml
Normal file
20
argocd/manifests/tailscale-operator/egress-forge.yaml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Egress proxy to expose Forgejo (forge) to the cluster
|
||||
# Forge runs on indri:3001, exposed via Tailscale Serve as forge.tail8d86e.ts.net
|
||||
# We target indri directly since egress can't reach Tailscale Serve hostnames
|
||||
#
|
||||
# See: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: forge
|
||||
namespace: tailscale
|
||||
annotations:
|
||||
tailscale.com/tailnet-fqdn: indri.tail8d86e.ts.net
|
||||
tailscale.com/proxy-class: "crio-compat"
|
||||
spec:
|
||||
type: ExternalName
|
||||
externalName: placeholder
|
||||
ports:
|
||||
- port: 3001
|
||||
targetPort: 3001
|
||||
13
argocd/manifests/tailscale-operator/kustomization.yaml
Normal file
13
argocd/manifests/tailscale-operator/kustomization.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: tailscale
|
||||
|
||||
resources:
|
||||
- operator.yaml
|
||||
- proxyclass.yaml
|
||||
- dnsconfig.yaml
|
||||
- egress-forge.yaml
|
||||
|
||||
# Note: OAuth secret (operator-oauth) is NOT included here.
|
||||
# It must be manually applied before deploying - see README.md
|
||||
5386
argocd/manifests/tailscale-operator/operator.yaml
Normal file
5386
argocd/manifests/tailscale-operator/operator.yaml
Normal file
File diff suppressed because it is too large
Load diff
29
argocd/manifests/tailscale-operator/proxyclass.yaml
Normal file
29
argocd/manifests/tailscale-operator/proxyclass.yaml
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# ProxyClass: crio-compat
|
||||
#
|
||||
# Why this exists:
|
||||
# CRI-O (the container runtime used by minikube) cannot resolve short image
|
||||
# names like "tailscale/tailscale:stable". It requires fully-qualified names
|
||||
# with an explicit registry prefix (e.g., "docker.io/tailscale/tailscale:stable").
|
||||
#
|
||||
# The Tailscale operator creates proxy pods (StatefulSets) for each LoadBalancer
|
||||
# Service or Ingress. By default, these pods use short image names which fail
|
||||
# on CRI-O with "ImageInspectError".
|
||||
#
|
||||
# Usage:
|
||||
# Add this annotation to any Tailscale Service or Ingress:
|
||||
# tailscale.com/proxy-class: "crio-compat"
|
||||
#
|
||||
# This tells the operator to use the fully-qualified image names defined below
|
||||
# when creating the proxy pod for that resource.
|
||||
---
|
||||
apiVersion: tailscale.com/v1alpha1
|
||||
kind: ProxyClass
|
||||
metadata:
|
||||
name: crio-compat
|
||||
spec:
|
||||
statefulSet:
|
||||
pod:
|
||||
tailscaleContainer:
|
||||
image: docker.io/tailscale/tailscale:stable
|
||||
tailscaleInitContainer:
|
||||
image: docker.io/tailscale/tailscale:stable
|
||||
14
argocd/manifests/tailscale-operator/secret.yaml.tpl
Normal file
14
argocd/manifests/tailscale-operator/secret.yaml.tpl
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# Tailscale Operator OAuth Secret
|
||||
# This template is processed by `op inject` to resolve 1Password references.
|
||||
#
|
||||
# Usage:
|
||||
# op inject -i secret.yaml.tpl | kubectl apply -f -
|
||||
#
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: operator-oauth
|
||||
namespace: tailscale
|
||||
stringData:
|
||||
client_id: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/2it22lavwgbxdskoaxanej354q/client-id }}"
|
||||
client_secret: "{{ op://vg6xf6vvfmoh5hqjjhlhbeoaie/2it22lavwgbxdskoaxanej354q/client-secret }}"
|
||||
|
|
@ -83,6 +83,14 @@ check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -
|
|||
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
|
||||
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
|
||||
|
||||
echo ""
|
||||
echo "Kubernetes workloads (via Tailscale):"
|
||||
check_http "ArgoCD" "https://argocd.tail8d86e.ts.net/healthz"
|
||||
# k8s PostgreSQL - check TCP connection (no auth needed for pg_isready)
|
||||
check_service "k8s-pg" "pg_isready -h k8s-pg.tail8d86e.ts.net -p 5432"
|
||||
# ArgoCD apps sync status
|
||||
check_service "ArgoCD apps synced" "kubectl --context=minikube-indri get applications -n argocd -o jsonpath='{.items[*].status.sync.status}' | grep -v OutOfSync"
|
||||
|
||||
echo ""
|
||||
if [ $FAILED -eq 0 ]; then
|
||||
echo -e "${GREEN}All services healthy!${NC}"
|
||||
|
|
|
|||
149
plans/k8s-migration/00_overview.md
Normal file
149
plans/k8s-migration/00_overview.md
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
# Blumeops Minikube Migration Plan
|
||||
|
||||
This plan details a phased migration of blumeops services from direct hosting on indri (Mac Mini M1) to a minikube cluster, while maintaining critical infrastructure services outside of Kubernetes.
|
||||
|
||||
## Phases
|
||||
|
||||
| Phase | Name | Status | Description |
|
||||
|-------|------|--------|-------------|
|
||||
| 0 | [Foundation](P0_foundation.complete.md) | Complete | Container registry + minikube cluster |
|
||||
| 1 | [K8s Infrastructure](P1_k8s_infrastructure.md) | In Progress | Tailscale operator, ArgoCD, CloudNativePG, PostgreSQL cluster |
|
||||
| 2 | [Grafana](P2_grafana.md) | Pending | Migrate Grafana (pilot) via ArgoCD |
|
||||
| 3 | [PostgreSQL](P3_postgresql.md) | Pending | Data migration to k8s PostgreSQL |
|
||||
| 4 | [Miniflux](P4_miniflux.md) | Pending | Migrate Miniflux via ArgoCD |
|
||||
| 5 | [devpi](P5_devpi.md) | Pending | Migrate devpi via ArgoCD |
|
||||
| 6 | [Kiwix](P6_kiwix.md) | Pending | Migrate Kiwix via ArgoCD |
|
||||
| 7 | [Forgejo](P7_forgejo.md) | Pending | Migrate Forgejo (highest risk) via ArgoCD |
|
||||
| 8 | [Woodpecker](P8_woodpecker.md) | Pending | Deploy CI/CD via ArgoCD |
|
||||
| 9 | [Cleanup](P9_cleanup.md) | Pending | Remove deprecated services |
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Services Staying on Indri (Outside K8s)
|
||||
| Service | Reason |
|
||||
|---------|--------|
|
||||
| **Zot Registry** (NEW) | Avoid circular dependency - k8s needs images to start |
|
||||
| **Prometheus** | Observability backbone must survive k8s failures |
|
||||
| **Loki** | Log aggregation backbone |
|
||||
| **Borgmatic** | Backup system |
|
||||
| **Grafana-alloy** | Metrics/logs collector on host |
|
||||
| **Plex** | Until Jellyfin replacement |
|
||||
| **Transmission** | Downloads for kiwix ZIM files |
|
||||
|
||||
### Services Moving to K8s
|
||||
| Service | Complexity | Dependencies |
|
||||
|---------|------------|--------------|
|
||||
| Grafana | LOW | Phase 1 |
|
||||
| Kiwix | LOW | Phase 1 |
|
||||
| Miniflux | MEDIUM | PostgreSQL |
|
||||
| devpi | MEDIUM | Registry |
|
||||
| PostgreSQL | HIGH | Phase 1 |
|
||||
| Forgejo | HIGH | PostgreSQL |
|
||||
| Woodpecker CI | MEDIUM | Forgejo |
|
||||
|
||||
## Technical Decisions
|
||||
|
||||
### Container Registry: Zot
|
||||
- OCI-native, lightweight
|
||||
- Native support for proxying multiple registries (Docker Hub, GHCR, Quay)
|
||||
- Built from source at `~/code/3rd/zot` (not in homebrew)
|
||||
- Binary: `~/code/3rd/zot/bin/zot-darwin-arm64`
|
||||
- Config: `~/.config/zot/config.json`
|
||||
- Data: `~/zot/`
|
||||
|
||||
### Minikube Driver: Podman
|
||||
- Rootless containers for better security
|
||||
- Lighter than full VM (QEMU)
|
||||
- Uses existing container ecosystem
|
||||
- `minikube start --driver=podman --container-runtime=cri-o`
|
||||
|
||||
### PostgreSQL: CloudNativePG Operator
|
||||
- Production-grade operator
|
||||
- Built-in backup/restore
|
||||
- Prometheus metrics
|
||||
- PITR support
|
||||
|
||||
### K8s Service Exposure: Tailscale Operator
|
||||
- `loadBalancerClass: tailscale` on Services
|
||||
- Automatic TLS and MagicDNS names
|
||||
- ACL-controlled access
|
||||
|
||||
### LaunchAgent Requirements (Critical)
|
||||
LaunchAgents do NOT get homebrew on PATH. All commands must use **absolute paths**:
|
||||
- `/Users/erichblume/code/3rd/zot/bin/zot-darwin-arm64` for zot (built from source)
|
||||
- `/opt/homebrew/opt/mise/bin/mise x --` for mise-managed tools
|
||||
- `/opt/homebrew/opt/postgresql@18/bin/pg_dump` for postgres tools
|
||||
|
||||
This applies to all mcquack LaunchAgents (zot, devpi, kiwix, borgmatic, metrics collectors).
|
||||
`brew services` handles this automatically but those aren't tracked in ansible.
|
||||
|
||||
### Backup Strategy
|
||||
|
||||
Borgmatic remains on indri (outside k8s), writing to sifaka NAS via SMB at `/Volumes/backups`. This ensures backups continue even if k8s is down.
|
||||
|
||||
| Service | Backup Approach |
|
||||
|---------|-----------------|
|
||||
| **Zot Registry** | No backup needed - pull-through cache is re-fetchable, private images rebuilt from source control |
|
||||
| **Minikube** | No backup of cluster state - declarative manifests in git, can recreate |
|
||||
| **PostgreSQL (k8s)** | CloudNativePG scheduled backups to sifaka (Phase 1) |
|
||||
| **Grafana (k8s)** | Dashboards in ansible source control, no runtime backup needed |
|
||||
| **Miniflux (k8s)** | Database backed up via CloudNativePG |
|
||||
| **Forgejo (k8s)** | Git repos are distributed, config in ansible; data dir backed up via borgmatic before migration |
|
||||
| **devpi (k8s)** | Private packages backed up, PyPI cache re-fetchable |
|
||||
| **Kiwix (k8s)** | ZIM files re-downloadable via torrent, no backup needed |
|
||||
|
||||
**Borgmatic config changes:** None required for Phase 0. Future phases may add k8s PV paths if needed.
|
||||
|
||||
---
|
||||
|
||||
## Critical Files
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `ansible/playbooks/indri.yml` | Main playbook - add k8s roles, remove migrated services |
|
||||
| `ansible/roles/tailscale_serve/defaults/main.yml` | Transition services to Tailscale operator |
|
||||
| `pulumi/policy.hujson` | Add tags: k8s, registry, ci |
|
||||
| `ansible/roles/borgmatic/defaults/main.yml` | Update PostgreSQL endpoint |
|
||||
| `mise-tasks/indri-services-check` | Add k8s health checks |
|
||||
|
||||
## New Directory Structure
|
||||
|
||||
```
|
||||
ansible/
|
||||
k8s/
|
||||
operators/
|
||||
tailscale-operator.yaml
|
||||
cloudnative-pg.yaml
|
||||
databases/
|
||||
blumeops-pg.yaml
|
||||
apps/
|
||||
grafana/
|
||||
miniflux/
|
||||
forgejo/
|
||||
devpi/
|
||||
kiwix/
|
||||
woodpecker/
|
||||
roles/
|
||||
zot/ # NEW
|
||||
podman/ # NEW
|
||||
minikube/ # NEW
|
||||
```
|
||||
|
||||
## Risk Mitigation
|
||||
|
||||
- **Circular dependency prevention**: Zot registry runs outside k8s
|
||||
- **Observability**: Prometheus/Loki stay on indri
|
||||
- **Data loss prevention**: borgmatic + manual backups before each phase
|
||||
- **Recovery**: Can manually push images, restore from backups
|
||||
|
||||
## Container Images (All ARM64)
|
||||
|
||||
| Service | Image |
|
||||
|---------|-------|
|
||||
| Miniflux | `ghcr.io/miniflux/miniflux:latest` |
|
||||
| Forgejo | `codeberg.org/forgejo/forgejo:10` |
|
||||
| Grafana | `grafana/grafana:latest` |
|
||||
| Kiwix | `ghcr.io/kiwix/kiwix-serve:3.8.1` |
|
||||
| Woodpecker | `woodpeckerci/woodpecker-server` |
|
||||
|
||||
Note: Zot runs as a native binary on indri (built from source at `~/code/3rd/zot`), not as a container.
|
||||
File diff suppressed because it is too large
Load diff
657
plans/k8s-migration/P1_k8s_infrastructure.md
Normal file
657
plans/k8s-migration/P1_k8s_infrastructure.md
Normal file
|
|
@ -0,0 +1,657 @@
|
|||
# Phase 1: Kubernetes Infrastructure
|
||||
|
||||
**Goal**: Tailscale operator, ArgoCD, CloudNativePG operator, PostgreSQL cluster
|
||||
|
||||
**Status**: In Progress
|
||||
|
||||
**Prerequisites**: [Phase 0](P0_foundation.complete.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 1 establishes the k8s control plane infrastructure:
|
||||
1. **Tailscale operator** - Exposes services on the tailnet
|
||||
2. **ArgoCD** - GitOps continuous delivery
|
||||
3. **CloudNativePG** - PostgreSQL operator
|
||||
4. **PostgreSQL cluster** - Database for future app migrations
|
||||
|
||||
The deployment follows a bootstrap pattern:
|
||||
- First two components deployed via `kubectl apply -k` (no GitOps yet)
|
||||
- ArgoCD then takes over management of all components including itself
|
||||
- All subsequent deployments use ArgoCD
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes Tags Overview
|
||||
|
||||
| Tag | Purpose | Applied To |
|
||||
|-----|---------|------------|
|
||||
| `tag:k8s-api` | Controls access to the K8s API server | indri (Phase 0.14) |
|
||||
| `tag:k8s-operator` | Identifies the Tailscale K8s Operator | OAuth client for operator |
|
||||
| `tag:k8s` | Default tag for operator-managed resources | Proxies, services, ingresses created by operator |
|
||||
|
||||
**Ownership chain**: `tag:k8s-operator` must own `tag:k8s` so the operator can assign that tag to devices it creates.
|
||||
|
||||
---
|
||||
|
||||
## PostgreSQL Migration Strategy
|
||||
|
||||
The k8s PostgreSQL cluster will eventually replace the brew PostgreSQL on indri.
|
||||
|
||||
| Phase | `pg.tail8d86e.ts.net` points to | Miniflux connects to |
|
||||
|-------|--------------------------------|---------------------|
|
||||
| Current | brew PostgreSQL (indri) | `pg.tail8d86e.ts.net` |
|
||||
| Phase 1 | brew PostgreSQL (indri) | `pg.tail8d86e.ts.net` (no change) |
|
||||
| Phase 4 | brew PostgreSQL (indri) | k8s PG (internal, after miniflux migrates to k8s) |
|
||||
| Post-Phase 4 | k8s PostgreSQL | k8s PG (internal) |
|
||||
| Cleanup | k8s PostgreSQL | k8s PG (internal) |
|
||||
|
||||
This allows zero-downtime migration - the Tailscale service switches after apps are migrated.
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Update Pulumi ACLs for k8s workloads ✓
|
||||
|
||||
**Status**: Complete
|
||||
|
||||
Added to `pulumi/policy.hujson`:
|
||||
- `tag:k8s-operator` - for the operator OAuth client
|
||||
- `tag:k8s` - for operator-managed resources (owned by `tag:k8s-operator`)
|
||||
- Grant for `tag:k8s` → `tag:registry` access
|
||||
|
||||
---
|
||||
|
||||
### 2. Create Tailscale OAuth client ✓
|
||||
|
||||
**Status**: Complete
|
||||
|
||||
OAuth client stored in 1Password (vault: `vg6xf6vvfmoh5hqjjhlhbeoaie`, item: `2it22lavwgbxdskoaxanej354q`)
|
||||
|
||||
**Configuration used:**
|
||||
- Tags: `tag:k8s-operator`
|
||||
- Devices write scope tag: `tag:k8s`
|
||||
- Scopes: Devices Core (R/W), Auth Keys (R/W), Services (Write)
|
||||
|
||||
---
|
||||
|
||||
### 3. Deploy Tailscale Kubernetes Operator (Bootstrap)
|
||||
|
||||
Deploy via `kubectl apply -k` - will be migrated to ArgoCD management in Step 5.
|
||||
|
||||
**Setup manifests directory:**
|
||||
```bash
|
||||
mkdir -p argocd/manifests/tailscale-operator
|
||||
cd argocd/manifests/tailscale-operator
|
||||
|
||||
# Download static manifest from Tailscale repo
|
||||
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/manifests/operator.yaml -o operator.yaml
|
||||
|
||||
# Download CRDs
|
||||
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml -o crds/connectors.yaml
|
||||
curl -sL https://raw.githubusercontent.com/tailscale/tailscale/main/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml -o crds/proxyclasses.yaml
|
||||
# ... (other CRDs as needed)
|
||||
```
|
||||
|
||||
**Create kustomization.yaml:**
|
||||
```yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: tailscale-system
|
||||
resources:
|
||||
- operator.yaml
|
||||
secretGenerator:
|
||||
- name: operator-oauth
|
||||
namespace: tailscale-system
|
||||
literals:
|
||||
- client_id=PLACEHOLDER
|
||||
- client_secret=PLACEHOLDER
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
```
|
||||
|
||||
**Deploy:**
|
||||
```bash
|
||||
# Get credentials from 1Password and create secret manually (kustomize secretGenerator is for reference)
|
||||
CLIENT_ID=$(op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 2it22lavwgbxdskoaxanej354q --fields client-id --reveal)
|
||||
CLIENT_SECRET=$(op --vault vg6xf6vvfmoh5hqjjhlhbeoaie item get 2it22lavwgbxdskoaxanej354q --fields client-secret --reveal)
|
||||
|
||||
kubectl create namespace tailscale-system
|
||||
kubectl create secret generic operator-oauth \
|
||||
--namespace tailscale-system \
|
||||
--from-literal=client_id=$CLIENT_ID \
|
||||
--from-literal=client_secret=$CLIENT_SECRET
|
||||
|
||||
# Apply operator manifests
|
||||
kubectl apply -k argocd/manifests/tailscale-operator/
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
```bash
|
||||
kubectl get pods -n tailscale-system
|
||||
# Expected: operator pod Running
|
||||
|
||||
kubectl logs -n tailscale-system -l app.kubernetes.io/name=tailscale-operator
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Deploy ArgoCD
|
||||
|
||||
Deploy ArgoCD and expose via Tailscale as `argocd.tail8d86e.ts.net`.
|
||||
|
||||
**Prerequisites:**
|
||||
- Add `tag:argocd` to Pulumi ACLs
|
||||
- Create Tailscale service `argocd` in admin console
|
||||
|
||||
**Setup manifests:**
|
||||
```bash
|
||||
mkdir -p argocd/manifests/argocd
|
||||
|
||||
# Download ArgoCD install manifest
|
||||
curl -sL https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml -o argocd/manifests/argocd/install.yaml
|
||||
```
|
||||
|
||||
**Create kustomization.yaml:**
|
||||
```yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: argocd
|
||||
resources:
|
||||
- install.yaml
|
||||
- service-tailscale.yaml # LoadBalancer for Tailscale exposure
|
||||
```
|
||||
|
||||
**Create service-tailscale.yaml:**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: argocd-server-tailscale
|
||||
namespace: argocd
|
||||
annotations:
|
||||
tailscale.com/hostname: "argocd"
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
loadBalancerClass: tailscale
|
||||
selector:
|
||||
app.kubernetes.io/name: argocd-server
|
||||
ports:
|
||||
- name: https
|
||||
port: 443
|
||||
targetPort: 8080
|
||||
```
|
||||
|
||||
**Deploy:**
|
||||
```bash
|
||||
kubectl create namespace argocd
|
||||
kubectl apply -k argocd/manifests/argocd/
|
||||
```
|
||||
|
||||
**Get initial admin password:**
|
||||
```bash
|
||||
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
- https://argocd.tail8d86e.ts.net loads
|
||||
- Can login with admin / <initial-password>
|
||||
|
||||
**Post-setup:**
|
||||
1. Change admin password, store in 1Password
|
||||
2. Configure git repo connection to `github.com/eblume/blumeops` (public, no auth needed)
|
||||
- Note: Using GitHub mirror since ArgoCD can't easily reach forge without additional networking
|
||||
|
||||
---
|
||||
|
||||
### 5. Migrate Tailscale Operator to ArgoCD
|
||||
|
||||
Create ArgoCD Application to manage the Tailscale operator.
|
||||
|
||||
**Create argocd/apps/tailscale-operator.yaml:**
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: tailscale-operator
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github.com/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/tailscale-operator
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: tailscale-system
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
```
|
||||
|
||||
**Apply:**
|
||||
```bash
|
||||
kubectl apply -f argocd/apps/tailscale-operator.yaml
|
||||
```
|
||||
|
||||
**Note on secrets:** The OAuth secret was created manually in Step 3. For GitOps, consider:
|
||||
- Sealed Secrets
|
||||
- External Secrets Operator
|
||||
- SOPS
|
||||
|
||||
For now, the secret remains manually managed outside of ArgoCD.
|
||||
|
||||
---
|
||||
|
||||
### 6. Deploy CloudNativePG via ArgoCD
|
||||
|
||||
**Setup manifests:**
|
||||
```bash
|
||||
mkdir -p argocd/manifests/cloudnative-pg
|
||||
|
||||
# Download CNPG operator manifest
|
||||
curl -sL https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg/release-1.24/releases/cnpg-1.24.0.yaml -o argocd/manifests/cloudnative-pg/operator.yaml
|
||||
```
|
||||
|
||||
**Create kustomization.yaml:**
|
||||
```yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- operator.yaml
|
||||
```
|
||||
|
||||
**Create ArgoCD Application (argocd/apps/cloudnative-pg.yaml):**
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: cloudnative-pg
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github.com/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/cloudnative-pg
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: cnpg-system
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
```
|
||||
|
||||
**Apply:**
|
||||
```bash
|
||||
kubectl apply -f argocd/apps/cloudnative-pg.yaml
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
```bash
|
||||
kubectl get pods -n cnpg-system
|
||||
# Expected: cnpg-controller-manager Running
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. Create PostgreSQL Cluster via ArgoCD
|
||||
|
||||
Create the database cluster. **Not exposed via Tailscale yet** - internal only until apps migrate.
|
||||
|
||||
**Create argocd/manifests/databases/blumeops-pg.yaml:**
|
||||
```yaml
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: blumeops-pg
|
||||
namespace: databases
|
||||
spec:
|
||||
instances: 1
|
||||
storage:
|
||||
size: 10Gi
|
||||
storageClass: standard
|
||||
monitoring:
|
||||
enablePodMonitor: true
|
||||
bootstrap:
|
||||
initdb:
|
||||
database: miniflux
|
||||
owner: miniflux
|
||||
```
|
||||
|
||||
**Create kustomization.yaml:**
|
||||
```yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: databases
|
||||
resources:
|
||||
- blumeops-pg.yaml
|
||||
```
|
||||
|
||||
**Create ArgoCD Application (argocd/apps/blumeops-pg.yaml):**
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: blumeops-pg
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github.com/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/manifests/databases
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: databases
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
```
|
||||
|
||||
**Apply:**
|
||||
```bash
|
||||
kubectl apply -f argocd/apps/blumeops-pg.yaml
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
```bash
|
||||
kubectl get cluster -n databases
|
||||
# Expected: blumeops-pg with STATUS "Cluster in healthy state"
|
||||
|
||||
kubectl get pods -n databases
|
||||
# Expected: blumeops-pg-1 Running
|
||||
|
||||
# Get connection secret
|
||||
kubectl -n databases get secret blumeops-pg-app -o jsonpath='{.data.uri}' | base64 -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. Create App-of-Apps Root Application
|
||||
|
||||
Once all components are deployed, create a root application to manage all apps.
|
||||
|
||||
**Create argocd/apps/root.yaml:**
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: root
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github.com/eblume/blumeops.git
|
||||
targetRevision: main
|
||||
path: argocd/apps
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: argocd
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
```
|
||||
|
||||
**Apply:**
|
||||
```bash
|
||||
kubectl apply -f argocd/apps/root.yaml
|
||||
```
|
||||
|
||||
Now ArgoCD manages itself and all other applications via the app-of-apps pattern.
|
||||
|
||||
---
|
||||
|
||||
## New Files Summary
|
||||
|
||||
```
|
||||
argocd/
|
||||
apps/
|
||||
root.yaml # App-of-apps root
|
||||
tailscale-operator.yaml # Tailscale operator app
|
||||
cloudnative-pg.yaml # CNPG operator app
|
||||
blumeops-pg.yaml # PostgreSQL cluster app
|
||||
manifests/
|
||||
tailscale-operator/
|
||||
kustomization.yaml
|
||||
operator.yaml
|
||||
argocd/
|
||||
kustomization.yaml
|
||||
install.yaml
|
||||
service-tailscale.yaml
|
||||
cloudnative-pg/
|
||||
kustomization.yaml
|
||||
operator.yaml
|
||||
databases/
|
||||
kustomization.yaml
|
||||
blumeops-pg.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pulumi ACL Updates Required
|
||||
|
||||
Add to `pulumi/policy.hujson`:
|
||||
```hujson
|
||||
"tag:argocd": ["autogroup:admin", "tag:blumeops"],
|
||||
```
|
||||
|
||||
Add to Erich's test accept list:
|
||||
```hujson
|
||||
"accept": [..., "tag:argocd:443"],
|
||||
```
|
||||
|
||||
Add to Allison's deny list:
|
||||
```hujson
|
||||
"deny": [..., "tag:argocd:443"],
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
```bash
|
||||
# 1. Tailscale operator running
|
||||
kubectl get pods -n tailscale-system
|
||||
|
||||
# 2. ArgoCD accessible
|
||||
curl -k https://argocd.tail8d86e.ts.net/healthz
|
||||
|
||||
# 3. CloudNativePG operator running
|
||||
kubectl get pods -n cnpg-system
|
||||
|
||||
# 4. PostgreSQL cluster healthy
|
||||
kubectl get cluster -n databases
|
||||
|
||||
# 5. All ArgoCD apps synced
|
||||
kubectl get applications -n argocd
|
||||
# All should show STATUS: Synced, HEALTH: Healthy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rollback
|
||||
|
||||
```bash
|
||||
# Remove ArgoCD apps (will cascade delete managed resources)
|
||||
kubectl delete application -n argocd root
|
||||
kubectl delete application -n argocd blumeops-pg
|
||||
kubectl delete application -n argocd cloudnative-pg
|
||||
kubectl delete application -n argocd tailscale-operator
|
||||
|
||||
# Remove ArgoCD
|
||||
kubectl delete -k argocd/manifests/argocd/
|
||||
kubectl delete namespace argocd
|
||||
|
||||
# Remove namespaces
|
||||
kubectl delete namespace databases
|
||||
kubectl delete namespace cnpg-system
|
||||
kubectl delete namespace tailscale-system
|
||||
|
||||
# Revert ACL changes
|
||||
git checkout pulumi/policy.hujson
|
||||
mise run tailnet-up
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes (Deviations from Plan)
|
||||
|
||||
*Added during implementation for retrospective review*
|
||||
|
||||
### Git Source: Forge Instead of GitHub
|
||||
|
||||
**Plan**: Use GitHub mirror (`github.com/eblume/blumeops`)
|
||||
**Actual**: Use internal Forgejo (`ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git`)
|
||||
|
||||
**Why**: User preference to use internal infrastructure, accepting circular dependency for later.
|
||||
|
||||
**Required changes**:
|
||||
- Deploy key added to forge for ArgoCD SSH access
|
||||
- Repository secret `repo-forge` with SSH private key from 1Password
|
||||
- Discovered: `op read` requires `?ssh-format=openssh` query parameter for ArgoCD-compatible key format
|
||||
- Egress proxy service to reach forge from cluster (targets `indri.tail8d86e.ts.net` not `forge.tail8d86e.ts.net` due to Tailscale Serve limitation)
|
||||
- DNSConfig CRD for cluster-to-tailnet MagicDNS resolution
|
||||
- ACL grant: `tag:k8s` → `tag:homelab` on ports 3001 (HTTP) and 2200 (SSH)
|
||||
|
||||
### ArgoCD Exposure: Ingress Instead of LoadBalancer
|
||||
|
||||
**Plan**: LoadBalancer service with `tailscale.com/hostname` annotation
|
||||
**Actual**: Tailscale Ingress with Let's Encrypt TLS termination
|
||||
|
||||
**Why**: Ingress provides automatic TLS certificates and is the recommended approach.
|
||||
|
||||
**File**: `argocd/manifests/argocd/service-tailscale.yaml` uses `kind: Ingress` with `ingressClassName: tailscale`
|
||||
|
||||
### Namespace: `tailscale` Instead of `tailscale-system`
|
||||
|
||||
**Plan**: `tailscale-system` namespace
|
||||
**Actual**: `tailscale` namespace
|
||||
|
||||
**Why**: Matches upstream Tailscale operator defaults.
|
||||
|
||||
### Sync Policy: Manual Instead of Automated
|
||||
|
||||
**Plan**: `syncPolicy.automated` with prune and selfHeal
|
||||
**Actual**: Manual sync policy for workload apps; auto-sync only for app-of-apps
|
||||
|
||||
**Why**: User preference for explicit control over deployments during initial migration phase.
|
||||
|
||||
**Pattern**:
|
||||
- `apps.yaml` (app-of-apps): auto-sync to pick up new Application manifests
|
||||
- All workload apps: manual sync requires `argocd app sync <name>`
|
||||
|
||||
### CloudNativePG: Helm Chart Instead of Raw Manifest
|
||||
|
||||
**Plan**: Download raw CNPG manifest
|
||||
**Actual**: Multi-source Application using official Helm chart from `https://cloudnative-pg.github.io/charts`
|
||||
|
||||
**Why**: Helm chart is the officially supported distribution method.
|
||||
|
||||
**Additional fix**: Required `ServerSideApply=true` sync option due to large CRD exceeding annotation size limit.
|
||||
|
||||
### App-of-Apps: Named `apps` Instead of `root`
|
||||
|
||||
**Plan**: `argocd/apps/root.yaml`
|
||||
**Actual**: `argocd/apps/apps.yaml` with Application named `apps`
|
||||
|
||||
**Why**: Clearer naming; `apps` manages apps, `argocd` manages itself.
|
||||
|
||||
### ArgoCD Self-Management Added
|
||||
|
||||
**Plan**: Not explicitly planned
|
||||
**Actual**: `argocd/apps/argocd.yaml` Application for ArgoCD self-management
|
||||
|
||||
**Why**: Standard GitOps pattern - ArgoCD manages its own deployment after bootstrap.
|
||||
|
||||
### CRI-O Registry Mirror for Zot
|
||||
|
||||
**Plan**: Not in original plan
|
||||
**Actual**: Configured CRI-O to use zot as pull-through cache for docker.io, ghcr.io, quay.io
|
||||
|
||||
**Why**: Reduces external bandwidth, speeds up pulls, avoids rate limits.
|
||||
|
||||
**Implementation**: Ansible `minikube` role applies `/etc/containers/registries.conf.d/zot-mirror.conf` inside minikube VM using stable hostname `host.containers.internal:5050`.
|
||||
|
||||
### ProxyClass for CRI-O Image Compatibility
|
||||
|
||||
**Plan**: Not mentioned
|
||||
**Actual**: Required `ProxyClass` with fully-qualified image paths (`docker.io/tailscale/...`)
|
||||
|
||||
**Why**: CRI-O requires fully-qualified image references; default Tailscale operator uses short names.
|
||||
|
||||
### Actual File Structure
|
||||
|
||||
```
|
||||
argocd/
|
||||
apps/
|
||||
apps.yaml # App-of-apps (auto-sync)
|
||||
argocd.yaml # ArgoCD self-management (manual sync)
|
||||
tailscale-operator.yaml # Tailscale operator (manual sync)
|
||||
cloudnative-pg.yaml # CNPG operator via Helm (manual sync)
|
||||
manifests/
|
||||
tailscale-operator/
|
||||
kustomization.yaml
|
||||
operator.yaml
|
||||
proxyclass.yaml # CRI-O compatibility
|
||||
dnsconfig.yaml # Cluster-to-tailnet DNS
|
||||
egress-forge.yaml # Egress proxy for forge
|
||||
secret.yaml.tpl # OAuth secret template (manual)
|
||||
README.md
|
||||
argocd/
|
||||
kustomization.yaml # Uses remote base from upstream
|
||||
service-tailscale.yaml # Ingress (not LoadBalancer)
|
||||
argocd-cmd-params-cm.yaml # Disable HTTPS redirect
|
||||
repo-forge-secret.yaml.tpl # SSH key template (manual)
|
||||
README.md
|
||||
cloudnative-pg/
|
||||
values.yaml # Helm values (currently minimal)
|
||||
README.md
|
||||
```
|
||||
|
||||
### Bootstrap Commands (Actual)
|
||||
|
||||
```bash
|
||||
# 1. Create namespaces
|
||||
kubectl create namespace tailscale
|
||||
kubectl create namespace argocd
|
||||
|
||||
# 2. Apply secrets (manual, uses 1Password)
|
||||
op inject -i argocd/manifests/tailscale-operator/secret.yaml.tpl | kubectl apply -f -
|
||||
|
||||
PRIV_KEY=$(op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/csjncynh6htjvnh2l2da65y32q/private key?ssh-format=openssh")$'\n' && \
|
||||
kubectl create secret generic repo-forge -n argocd \
|
||||
--from-literal=type=git \
|
||||
--from-literal=url='ssh://forgejo@indri.tail8d86e.ts.net:2200/eblume/blumeops.git' \
|
||||
--from-literal=insecure=true \
|
||||
--from-literal=sshPrivateKey="$PRIV_KEY" && \
|
||||
kubectl label secret repo-forge -n argocd argocd.argoproj.io/secret-type=repository
|
||||
|
||||
# 3. Bootstrap tailscale-operator
|
||||
kubectl apply -k argocd/manifests/tailscale-operator/
|
||||
|
||||
# 4. Bootstrap ArgoCD
|
||||
kubectl apply -k argocd/manifests/argocd/
|
||||
|
||||
# 5. Login and change password
|
||||
argocd login argocd.tail8d86e.ts.net --username admin --grpc-web
|
||||
argocd account update-password
|
||||
|
||||
# 6. Apply ArgoCD Applications
|
||||
kubectl apply -f argocd/apps/argocd.yaml
|
||||
kubectl apply -f argocd/apps/apps.yaml
|
||||
|
||||
# 7. Sync workloads
|
||||
argocd app sync tailscale-operator
|
||||
argocd app sync cloudnative-pg
|
||||
```
|
||||
52
plans/k8s-migration/P2_grafana.md
Normal file
52
plans/k8s-migration/P2_grafana.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Phase 2: Grafana Migration (Pilot)
|
||||
|
||||
**Goal**: Migrate Grafana as lowest-risk pilot service
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 1](P1_k8s_infrastructure.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Deploy Grafana via Helm
|
||||
|
||||
- Copy datasource config from existing role
|
||||
- Copy dashboards from `ansible/roles/grafana/files/dashboards/`
|
||||
- Point to indri Prometheus/Loki (http://indri:9090, http://indri:3100)
|
||||
|
||||
---
|
||||
|
||||
### 2. Configure Tailscale LoadBalancer
|
||||
|
||||
```yaml
|
||||
service:
|
||||
type: LoadBalancer
|
||||
loadBalancerClass: tailscale
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Verify all dashboards work
|
||||
|
||||
---
|
||||
|
||||
### 4. Update tailscale_serve
|
||||
|
||||
Remove grafana entry from `ansible/roles/tailscale_serve/defaults/main.yml`
|
||||
|
||||
---
|
||||
|
||||
### 5. Stop brew grafana
|
||||
|
||||
```bash
|
||||
brew services stop grafana
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
- https://grafana.tail8d86e.ts.net loads
|
||||
- All dashboards functional
|
||||
55
plans/k8s-migration/P3_postgresql.md
Normal file
55
plans/k8s-migration/P3_postgresql.md
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Phase 3: PostgreSQL Migration
|
||||
|
||||
**Goal**: Migrate miniflux database to CloudNativePG
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 2](P2_grafana.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Create databases and users in k8s PostgreSQL
|
||||
|
||||
- miniflux database/user
|
||||
- borgmatic read-only user
|
||||
|
||||
---
|
||||
|
||||
### 2. Export from brew PostgreSQL
|
||||
|
||||
```bash
|
||||
pg_dump -h localhost -U miniflux miniflux > miniflux_backup.sql
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Expose k8s PostgreSQL via Tailscale
|
||||
|
||||
- Service with `loadBalancerClass: tailscale`
|
||||
- Tag: `svc:pg-k8s`
|
||||
|
||||
---
|
||||
|
||||
### 4. Import data
|
||||
|
||||
```bash
|
||||
psql -h pg-k8s.tail8d86e.ts.net -U miniflux miniflux < miniflux_backup.sql
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. Update borgmatic config
|
||||
|
||||
- Change hostname to k8s PostgreSQL
|
||||
|
||||
---
|
||||
|
||||
### 6. Verify data integrity
|
||||
|
||||
---
|
||||
|
||||
## Rollback
|
||||
|
||||
Keep brew PostgreSQL running until Phase 4 verified
|
||||
48
plans/k8s-migration/P4_miniflux.md
Normal file
48
plans/k8s-migration/P4_miniflux.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Phase 4: Miniflux Migration
|
||||
|
||||
**Goal**: Migrate Miniflux to k8s
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 3](P3_postgresql.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Deploy Miniflux
|
||||
|
||||
```yaml
|
||||
image: ghcr.io/miniflux/miniflux:latest
|
||||
env:
|
||||
DATABASE_URL: from secret
|
||||
RUN_MIGRATIONS: "1"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Configure Tailscale LoadBalancer
|
||||
|
||||
Tag: `svc:feed`
|
||||
|
||||
---
|
||||
|
||||
### 3. Update Alloy log collection
|
||||
|
||||
Add k8s namespace
|
||||
|
||||
---
|
||||
|
||||
### 4. Verify
|
||||
|
||||
- Login works
|
||||
- Feeds refresh
|
||||
- API works
|
||||
|
||||
---
|
||||
|
||||
### 5. Stop brew miniflux
|
||||
|
||||
```bash
|
||||
brew services stop miniflux
|
||||
```
|
||||
37
plans/k8s-migration/P5_devpi.md
Normal file
37
plans/k8s-migration/P5_devpi.md
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Phase 5: devpi Migration
|
||||
|
||||
**Goal**: Migrate devpi to k8s
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 4](P4_miniflux.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Build devpi container
|
||||
|
||||
- Dockerfile with devpi-server + devpi-web
|
||||
- Push to local Zot registry
|
||||
|
||||
---
|
||||
|
||||
### 2. Deploy as StatefulSet
|
||||
|
||||
- PVC for data (50Gi)
|
||||
- Migrate existing data (excluding PyPI cache)
|
||||
|
||||
---
|
||||
|
||||
### 3. Configure Tailscale LoadBalancer
|
||||
|
||||
Tag: `svc:pypi`
|
||||
|
||||
---
|
||||
|
||||
### 4. Update pip.conf on gilbert
|
||||
|
||||
---
|
||||
|
||||
### 5. Stop mcquack devpi
|
||||
35
plans/k8s-migration/P6_kiwix.md
Normal file
35
plans/k8s-migration/P6_kiwix.md
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# Phase 6: Kiwix Migration
|
||||
|
||||
**Goal**: Migrate kiwix-serve to k8s
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 5](P5_devpi.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Create NFS/hostPath PV for ZIM files
|
||||
|
||||
- Point to transmission download directory
|
||||
- ReadOnlyMany access
|
||||
|
||||
---
|
||||
|
||||
### 2. Deploy Kiwix
|
||||
|
||||
```yaml
|
||||
image: ghcr.io/kiwix/kiwix-serve:3.8.1
|
||||
args: ["/data/*.zim"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Configure Tailscale LoadBalancer
|
||||
|
||||
Tag: `svc:kiwix`
|
||||
|
||||
---
|
||||
|
||||
### 4. Stop mcquack kiwix-serve
|
||||
51
plans/k8s-migration/P7_forgejo.md
Normal file
51
plans/k8s-migration/P7_forgejo.md
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Phase 7: Forgejo Migration (Highest Risk)
|
||||
|
||||
**Goal**: Migrate Forgejo to k8s
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 6](P6_kiwix.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Pre-Migration Checklist
|
||||
|
||||
- [ ] Full borgmatic backup verified
|
||||
- [ ] Manual backup of `/opt/homebrew/var/forgejo`
|
||||
- [ ] Document SSH keys and webhooks
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Deploy Forgejo via Helm
|
||||
|
||||
```bash
|
||||
helm install forgejo forgejo/forgejo \
|
||||
--namespace forgejo --create-namespace
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Migrate data
|
||||
|
||||
- Stop brew forgejo
|
||||
- Copy data to PVC
|
||||
- Start k8s forgejo
|
||||
|
||||
---
|
||||
|
||||
### 3. Configure Tailscale services
|
||||
|
||||
- HTTPS 443 via LoadBalancer
|
||||
- SSH port 22 (TCP proxy)
|
||||
|
||||
---
|
||||
|
||||
### 4. Verify all repositories accessible
|
||||
|
||||
---
|
||||
|
||||
## Rollback
|
||||
|
||||
Restore brew forgejo and tailscale serve config
|
||||
32
plans/k8s-migration/P8_woodpecker.md
Normal file
32
plans/k8s-migration/P8_woodpecker.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Phase 8: CI/CD (Woodpecker)
|
||||
|
||||
**Goal**: Deploy Woodpecker CI integrated with Forgejo
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 7](P7_forgejo.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Create Forgejo OAuth application
|
||||
|
||||
- Callback: https://ci.tail8d86e.ts.net/authorize
|
||||
- Store in 1Password
|
||||
|
||||
---
|
||||
|
||||
### 2. Deploy Woodpecker Server + Agent
|
||||
|
||||
---
|
||||
|
||||
### 3. Configure Tailscale LoadBalancer
|
||||
|
||||
Tag: `svc:ci`
|
||||
|
||||
---
|
||||
|
||||
### 4. Test pipeline
|
||||
|
||||
Create `.woodpecker.yaml` in test repo
|
||||
52
plans/k8s-migration/P9_cleanup.md
Normal file
52
plans/k8s-migration/P9_cleanup.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Phase 9: Cleanup
|
||||
|
||||
**Goal**: Remove deprecated services, harden system
|
||||
|
||||
**Status**: Pending
|
||||
|
||||
**Prerequisites**: [Phase 8](P8_woodpecker.md) complete
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Stop/remove unused brew services
|
||||
|
||||
- postgresql@18
|
||||
- grafana
|
||||
- miniflux
|
||||
- forgejo
|
||||
|
||||
---
|
||||
|
||||
### 2. Update ansible playbook
|
||||
|
||||
- Remove migrated service roles
|
||||
- Add k8s deployment references
|
||||
|
||||
---
|
||||
|
||||
### 3. Configure Velero backups (optional)
|
||||
|
||||
- Install with MinIO on sifaka
|
||||
- Schedule daily cluster backups
|
||||
|
||||
---
|
||||
|
||||
### 4. Update zk documentation
|
||||
|
||||
- New architecture
|
||||
- Runbooks
|
||||
- DR procedures
|
||||
|
||||
---
|
||||
|
||||
## Plan Completion
|
||||
|
||||
When all phases are complete and verified:
|
||||
|
||||
```bash
|
||||
# Rename this folder to indicate completion
|
||||
git mv plans/k8s-migration plans/k8s-migration.complete
|
||||
git commit -m "Complete k8s migration plan"
|
||||
```
|
||||
|
|
@ -59,6 +59,21 @@
|
|||
"dst": ["tag:nas"],
|
||||
"ip": ["*"],
|
||||
},
|
||||
|
||||
// --- Kubernetes workloads ---
|
||||
// k8s workloads (e.g., Woodpecker CI) can push/pull from registry
|
||||
{
|
||||
"src": ["tag:k8s"],
|
||||
"dst": ["tag:registry"],
|
||||
"ip": ["tcp:443"],
|
||||
},
|
||||
// k8s workloads (e.g., ArgoCD) can access forge on indri for GitOps
|
||||
// HTTP on 3001, SSH on 2200
|
||||
{
|
||||
"src": ["tag:k8s"],
|
||||
"dst": ["tag:homelab"],
|
||||
"ip": ["tcp:3001", "tcp:2200"],
|
||||
},
|
||||
],
|
||||
|
||||
// ============== SSH Access ==============
|
||||
|
|
@ -103,6 +118,8 @@
|
|||
"tag:feed": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:registry": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:k8s-api": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:k8s-operator": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:k8s": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"],
|
||||
},
|
||||
|
||||
// ============== ACL Tests ==============
|
||||
|
|
@ -123,5 +140,10 @@
|
|||
"src": "tag:homelab",
|
||||
"accept": ["tag:homelab:22", "tag:nas:445"],
|
||||
},
|
||||
// K8s workloads can reach registry and forge (on indri:3001 HTTP, :2200 SSH)
|
||||
{
|
||||
"src": "tag:k8s",
|
||||
"accept": ["tag:registry:443", "tag:homelab:3001", "tag:homelab:2200"],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue