Migrate observability stack to Kubernetes (#42)
Note: the name of this branch was chosen before the scope widened to encompass the entire observability stack. Summary - Fix Grafana data source URLs (docker driver uses host.minikube.internal, not host.containers.internal) - Migrate Prometheus and Loki from indri to Kubernetes with Tailscale Ingresses - Expose CNPG PostgreSQL metrics via Tailscale and update dashboard to use cnpg_* metrics - Update Alloy to push metrics/logs to k8s endpoints (prometheus.tail8d86e.ts.net, loki.tail8d86e.ts.net) - Add ACL rule for port 9187 (CNPG metrics) - Delete obsolete ansible roles for prometheus and loki Changes - argocd/manifests/prometheus/ - New Prometheus StatefulSet with 20Gi PVC and Tailscale Ingress - argocd/manifests/loki/ - New Loki StatefulSet with 20Gi PVC and Tailscale Ingress - argocd/apps/prometheus.yaml, argocd/apps/loki.yaml - ArgoCD Applications - argocd/manifests/grafana/values.yaml - Data sources now use k8s internal DNS - argocd/manifests/databases/service-metrics-tailscale.yaml - CNPG metrics endpoint - argocd/manifests/grafana-config/dashboards/configmap-postgresql.yaml - Updated to cnpg_* metrics - ansible/roles/alloy/defaults/main.yml - Push to k8s Tailscale endpoints - pulumi/policy.hujson - ACL for port 9187 - Deleted ansible/roles/prometheus/ and ansible/roles/loki/ Deployment and Testing - Stop prometheus and loki on indri - Sync ArgoCD apps (apps, prometheus, loki, grafana) - Run mise run provision-indri -- --tags alloy - Verify Grafana dashboards show data 🤖 Generated with https://claude.ai/claude-code Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/42
This commit is contained in:
parent
5a829e0afd
commit
17023085cb
36 changed files with 569 additions and 270 deletions
|
|
@ -54,7 +54,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pg_up",
|
||||
"expr": "cnpg_collector_up",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
|
@ -95,7 +95,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pg_stat_activity_count{state=\"active\"}",
|
||||
"expr": "cnpg_backends_total{state=\"active\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
|
@ -136,7 +136,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(pg_stat_activity_count)",
|
||||
"expr": "sum(cnpg_backends_total)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
|
@ -177,7 +177,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(pg_database_size_bytes)",
|
||||
"expr": "sum(cnpg_pg_database_size_bytes)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
|
@ -249,7 +249,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pg_stat_activity_count",
|
||||
"expr": "cnpg_backends_total",
|
||||
"legendFormat": "{{state}}",
|
||||
"refId": "A"
|
||||
}
|
||||
|
|
@ -322,7 +322,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pg_database_size_bytes{datname!~\"template.*\"}",
|
||||
"expr": "cnpg_pg_database_size_bytes{datname!~\"template.*\"}",
|
||||
"legendFormat": "{{datname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
|
|
@ -395,22 +395,22 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(pg_stat_database_tup_fetched{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_tup_fetched{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} fetched",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(pg_stat_database_tup_inserted{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_tup_inserted{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} inserted",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(pg_stat_database_tup_updated{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_tup_updated{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} updated",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "rate(pg_stat_database_tup_deleted{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_tup_deleted{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} deleted",
|
||||
"refId": "D"
|
||||
}
|
||||
|
|
@ -483,12 +483,12 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(pg_stat_database_xact_commit{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_xact_commit{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} commits",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(pg_stat_database_xact_rollback{datname!~\"template.*\"}[5m])",
|
||||
"expr": "rate(cnpg_pg_stat_database_xact_rollback{datname!~\"template.*\"}[5m])",
|
||||
"legendFormat": "{{datname}} rollbacks",
|
||||
"refId": "B"
|
||||
}
|
||||
|
|
@ -561,7 +561,7 @@ data:
|
|||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pg_database_xid_age_xid_age",
|
||||
"expr": "cnpg_pg_database_xid_age",
|
||||
"legendFormat": "{{datname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue