Add Forgejo repository health metrics and Grafana dashboard (#245)
## Summary - New `forgejo_metrics` Ansible role that queries the Forgejo REST API every 60s and writes Prometheus textfile metrics (open PRs, issues, languages, releases, commits, Actions runs/duration/success) - Grafana dashboard "Forgejo Repository Health" with 12 panels across 4 rows: overview stats, CI/CD health, repository info, and staleness tracking - Deletes superseded `forgejo-actions-dashboard` plan doc (this implementation covers a broader scope) ## Deployment and Testing - [ ] `mise run provision-indri -- --tags forgejo_metrics` to deploy the collector - [ ] `ssh indri 'cat /opt/homebrew/var/node_exporter/textfile/forgejo.prom'` to verify metrics - [ ] `argocd app sync grafana-config` to deploy the dashboard - [ ] Check Grafana dashboard "Forgejo Repository Health" loads with data - [ ] `mise run services-check` passes Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/245
This commit is contained in:
parent
c21cf54847
commit
2c081eed28
12 changed files with 989 additions and 201 deletions
20
ansible/roles/forgejo_metrics/defaults/main.yml
Normal file
20
ansible/roles/forgejo_metrics/defaults/main.yml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
# Forgejo metrics collection configuration
|
||||
|
||||
# Forgejo server URL
|
||||
forgejo_metrics_url: "http://localhost:3001"
|
||||
|
||||
# Path to file containing Forgejo API token (should have 600 permissions)
|
||||
forgejo_metrics_api_key_file: "/Users/erichblume/.forgejo-api-key"
|
||||
|
||||
# Metrics collection interval in seconds
|
||||
forgejo_metrics_interval: 60
|
||||
|
||||
# Output directory for prometheus textfile collector
|
||||
forgejo_metrics_dir: /opt/homebrew/var/node_exporter/textfile
|
||||
|
||||
# Script installation path
|
||||
forgejo_metrics_script: /Users/erichblume/.local/bin/forgejo-metrics
|
||||
|
||||
# Log directory for metrics script output
|
||||
forgejo_metrics_log_dir: /opt/homebrew/var/log
|
||||
6
ansible/roles/forgejo_metrics/handlers/main.yml
Normal file
6
ansible/roles/forgejo_metrics/handlers/main.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Reload forgejo-metrics
|
||||
ansible.builtin.shell: |
|
||||
launchctl unload ~/Library/LaunchAgents/mcquack.eblume.forgejo-metrics.plist 2>/dev/null || true
|
||||
launchctl load ~/Library/LaunchAgents/mcquack.eblume.forgejo-metrics.plist
|
||||
changed_when: true
|
||||
55
ansible/roles/forgejo_metrics/tasks/main.yml
Normal file
55
ansible/roles/forgejo_metrics/tasks/main.yml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
- name: Fetch Forgejo API token (when running with --tags forgejo_metrics)
|
||||
ansible.builtin.command:
|
||||
cmd: op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/w3663ffnvkewbftncqxtcpeavy/api-token"
|
||||
delegate_to: localhost
|
||||
register: forgejo_metrics_api_key_fallback
|
||||
changed_when: false
|
||||
no_log: true
|
||||
check_mode: false
|
||||
when: forgejo_metrics_api_key is not defined
|
||||
|
||||
- name: Set Forgejo API token fact (fallback)
|
||||
ansible.builtin.set_fact:
|
||||
forgejo_metrics_api_key: "{{ forgejo_metrics_api_key_fallback.stdout }}"
|
||||
no_log: true
|
||||
when: forgejo_metrics_api_key is not defined
|
||||
|
||||
- name: Write Forgejo API token file
|
||||
ansible.builtin.copy:
|
||||
content: "{{ forgejo_metrics_api_key }}"
|
||||
dest: "{{ forgejo_metrics_api_key_file }}"
|
||||
mode: '0600'
|
||||
no_log: true
|
||||
|
||||
- name: Ensure bin directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ forgejo_metrics_script | dirname }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy forgejo metrics collection script
|
||||
ansible.builtin.template:
|
||||
src: forgejo-metrics.sh.j2
|
||||
dest: "{{ forgejo_metrics_script }}"
|
||||
mode: '0755'
|
||||
notify: Reload forgejo-metrics
|
||||
|
||||
- name: Deploy forgejo-metrics LaunchAgent plist
|
||||
ansible.builtin.template:
|
||||
src: forgejo-metrics.plist.j2
|
||||
dest: ~/Library/LaunchAgents/mcquack.eblume.forgejo-metrics.plist
|
||||
mode: '0644'
|
||||
notify: Reload forgejo-metrics
|
||||
|
||||
- name: Check if forgejo-metrics LaunchAgent is loaded
|
||||
ansible.builtin.command: launchctl list mcquack.eblume.forgejo-metrics
|
||||
register: forgejo_metrics_launchctl_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Load forgejo-metrics LaunchAgent if not loaded
|
||||
ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.forgejo-metrics.plist
|
||||
when: forgejo_metrics_launchctl_check.rc != 0
|
||||
changed_when: true
|
||||
failed_when: false
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- {{ ansible_managed }} -->
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>mcquack.eblume.forgejo-metrics</string>
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>PATH</key>
|
||||
<string>/opt/homebrew/bin:/usr/bin:/bin</string>
|
||||
</dict>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>{{ forgejo_metrics_script }}</string>
|
||||
</array>
|
||||
<key>StartInterval</key>
|
||||
<integer>{{ forgejo_metrics_interval }}</integer>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>{{ forgejo_metrics_log_dir }}/mcquack.forgejo-metrics.err.log</string>
|
||||
<key>StandardOutPath</key>
|
||||
<string>{{ forgejo_metrics_log_dir }}/mcquack.forgejo-metrics.out.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
162
ansible/roles/forgejo_metrics/templates/forgejo-metrics.sh.j2
Normal file
162
ansible/roles/forgejo_metrics/templates/forgejo-metrics.sh.j2
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#!/bin/bash
|
||||
# {{ ansible_managed }}
|
||||
# Collects Forgejo repository health metrics for node_exporter textfile collector
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
FORGEJO_URL="{{ forgejo_metrics_url }}"
|
||||
API_KEY_FILE="{{ forgejo_metrics_api_key_file }}"
|
||||
OUTPUT_FILE="{{ forgejo_metrics_dir }}/forgejo.prom"
|
||||
TEMP_FILE="${OUTPUT_FILE}.tmp"
|
||||
|
||||
TOKEN=$(cat "$API_KEY_FILE" 2>/dev/null | tr -d '\n' || true)
|
||||
|
||||
# Authenticated API request; returns empty string on failure
|
||||
api() {
|
||||
curl -sf -H "Authorization: token ${TOKEN}" -H "Accept: application/json" \
|
||||
"${FORGEJO_URL}/api/v1${1}" 2>/dev/null || echo ""
|
||||
}
|
||||
|
||||
# jq helper: convert ISO 8601 timestamp (with any tz offset) to epoch seconds
|
||||
# jq's fromdate only handles Z, so we parse the offset and apply it manually
|
||||
JQ_EPOCH='def epoch: sub("[.][0-9]+"; "") | if test("[+-][0-9]{2}:[0-9]{2}$") then capture("^(?<dt>.*)(?<sign>[+-])(?<h>[0-9]{2}):(?<m>[0-9]{2})$") | (.dt + "Z" | fromdate) as $base | ((.h | tonumber) * 3600 + (.m | tonumber) * 60) as $off | if .sign == "-" then $base + $off else $base - $off end else sub("Z$"; "") + "Z" | fromdate end;'
|
||||
|
||||
forgejo_up=0
|
||||
if curl -sf "${FORGEJO_URL}/api/v1/version" >/dev/null 2>&1; then
|
||||
forgejo_up=1
|
||||
fi
|
||||
|
||||
{
|
||||
# --- Metric type declarations ---
|
||||
cat << 'HEADER'
|
||||
# HELP forgejo_up Forgejo server is up and responding
|
||||
# TYPE forgejo_up gauge
|
||||
# HELP forgejo_repo_open_pull_requests Number of open pull requests
|
||||
# TYPE forgejo_repo_open_pull_requests gauge
|
||||
# HELP forgejo_repo_open_issues Number of open issues
|
||||
# TYPE forgejo_repo_open_issues gauge
|
||||
# HELP forgejo_repo_language_bytes Repository language size in bytes
|
||||
# TYPE forgejo_repo_language_bytes gauge
|
||||
# HELP forgejo_repo_releases_total Total number of releases
|
||||
# TYPE forgejo_repo_releases_total gauge
|
||||
# HELP forgejo_repo_latest_release_timestamp_seconds Unix timestamp of the latest release
|
||||
# TYPE forgejo_repo_latest_release_timestamp_seconds gauge
|
||||
# HELP forgejo_repo_latest_commit_timestamp_seconds Unix timestamp of the latest commit on default branch
|
||||
# TYPE forgejo_repo_latest_commit_timestamp_seconds gauge
|
||||
# HELP forgejo_actions_runs_total Action runs by status from most recent 30
|
||||
# TYPE forgejo_actions_runs_total gauge
|
||||
# HELP forgejo_actions_run_duration_seconds Duration of the latest completed run per workflow in seconds
|
||||
# TYPE forgejo_actions_run_duration_seconds gauge
|
||||
# HELP forgejo_actions_last_success_timestamp_seconds Unix timestamp of last successful run per workflow
|
||||
# TYPE forgejo_actions_last_success_timestamp_seconds gauge
|
||||
# HELP forgejo_actions_jobs_waiting Number of action runs currently waiting or queued
|
||||
# TYPE forgejo_actions_jobs_waiting gauge
|
||||
# HELP forgejo_actions_jobs_running Number of action runs currently in progress
|
||||
# TYPE forgejo_actions_jobs_running gauge
|
||||
HEADER
|
||||
|
||||
echo "forgejo_up ${forgejo_up}"
|
||||
|
||||
if [ "$forgejo_up" -eq 1 ] && [ -n "$TOKEN" ]; then
|
||||
# Discover all repos accessible to the token owner
|
||||
repos_json=$(api "/repos/search?limit=50")
|
||||
[ -z "$repos_json" ] && repos_json='{"data":[]}'
|
||||
|
||||
repo_count=$(echo "$repos_json" | jq '.data | length' 2>/dev/null || echo "0")
|
||||
|
||||
for i in $(seq 0 $((repo_count - 1))); do
|
||||
repo_data=$(echo "$repos_json" | jq ".data[$i]")
|
||||
full_name=$(echo "$repo_data" | jq -r '.full_name')
|
||||
[ -z "$full_name" ] || [ "$full_name" = "null" ] && continue
|
||||
|
||||
r="$full_name"
|
||||
|
||||
# Basic repo metrics (from search results — no extra API call)
|
||||
echo "forgejo_repo_open_pull_requests{repo=\"${r}\"} $(echo "$repo_data" | jq '.open_pr_counter // 0')"
|
||||
echo "forgejo_repo_open_issues{repo=\"${r}\"} $(echo "$repo_data" | jq '.open_issues_count // 0')"
|
||||
|
||||
default_branch=$(echo "$repo_data" | jq -r '.default_branch // "main"')
|
||||
|
||||
# --- Languages ---
|
||||
langs=$(api "/repos/${r}/languages")
|
||||
if [ -n "$langs" ] && echo "$langs" | jq -e 'type == "object" and length > 0' >/dev/null 2>&1; then
|
||||
echo "$langs" | jq -r --arg r "$r" \
|
||||
'to_entries[] | "forgejo_repo_language_bytes{repo=\"\($r)\",language=\"\(.key)\"} \(.value)"' \
|
||||
2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- Releases ---
|
||||
releases=$(api "/repos/${r}/releases?limit=50")
|
||||
if [ -n "$releases" ] && echo "$releases" | jq -e 'type == "array"' >/dev/null 2>&1; then
|
||||
echo "forgejo_repo_releases_total{repo=\"${r}\"} $(echo "$releases" | jq 'length')"
|
||||
# Latest release timestamp and version
|
||||
echo "$releases" | jq -r --arg r "$r" "${JQ_EPOCH}"'
|
||||
if length > 0 then
|
||||
.[0] |
|
||||
"forgejo_repo_latest_release_timestamp_seconds{repo=\"\($r)\",version=\"\(.tag_name)\"} \((.published_at // .created_at // .created) | epoch)"
|
||||
else empty end' 2>/dev/null || true
|
||||
else
|
||||
echo "forgejo_repo_releases_total{repo=\"${r}\"} 0"
|
||||
fi
|
||||
|
||||
# --- Latest commit on default branch ---
|
||||
commits=$(api "/repos/${r}/commits?limit=1&sha=${default_branch}")
|
||||
if [ -n "$commits" ] && echo "$commits" | jq -e 'type == "array" and length > 0' >/dev/null 2>&1; then
|
||||
echo "$commits" | jq -r --arg r "$r" "${JQ_EPOCH}"'
|
||||
.[0] |
|
||||
"forgejo_repo_latest_commit_timestamp_seconds{repo=\"\($r)\"} \((.created // .commit.committer.date) | epoch)"' \
|
||||
2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- Action runs ---
|
||||
runs_json=$(api "/repos/${r}/actions/runs?limit=30")
|
||||
if [ -n "$runs_json" ] && echo "$runs_json" | jq -e '.workflow_runs | type == "array"' >/dev/null 2>&1; then
|
||||
# Count by status
|
||||
echo "$runs_json" | jq -r --arg r "$r" '
|
||||
.workflow_runs | group_by(.status) | .[] |
|
||||
"forgejo_actions_runs_total{repo=\"\($r)\",status=\"\(.[0].status)\"} \(length)"' \
|
||||
2>/dev/null || true
|
||||
|
||||
# Jobs waiting/running
|
||||
waiting=$(echo "$runs_json" | jq '[.workflow_runs[] | select(.status == "waiting" or .status == "queued")] | length' 2>/dev/null || echo "0")
|
||||
running=$(echo "$runs_json" | jq '[.workflow_runs[] | select(.status == "running")] | length' 2>/dev/null || echo "0")
|
||||
echo "forgejo_actions_jobs_waiting{repo=\"${r}\"} ${waiting}"
|
||||
echo "forgejo_actions_jobs_running{repo=\"${r}\"} ${running}"
|
||||
|
||||
# Discover current workflow files on the default branch (.forgejo/ or .github/)
|
||||
current_wfs=""
|
||||
for wf_dir in .forgejo/workflows .github/workflows; do
|
||||
wf_list=$(api "/repos/${r}/contents/${wf_dir}?ref=${default_branch}")
|
||||
if [ -n "$wf_list" ] && echo "$wf_list" | jq -e 'type == "array"' >/dev/null 2>&1; then
|
||||
current_wfs=$(echo "$wf_list" | jq -r '[.[].name] | join(",")' 2>/dev/null || true)
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Per-workflow: latest completed run duration and last success timestamp
|
||||
# Only include workflows that currently exist on the default branch
|
||||
# Forgejo fields: workflow_id (filename), created/stopped, duration (nanoseconds)
|
||||
if [ -n "$current_wfs" ]; then
|
||||
echo "$runs_json" | jq -r --arg r "$r" --arg wfs "$current_wfs" "${JQ_EPOCH}"'
|
||||
($wfs | split(",")) as $current |
|
||||
[.workflow_runs[] | select((.status == "success" or .status == "failure") and (.workflow_id | IN($current[])))] |
|
||||
if length > 0 then
|
||||
group_by(.workflow_id) | .[] |
|
||||
(sort_by(.created) | reverse) as $sorted |
|
||||
($sorted[0]) as $latest |
|
||||
($latest.workflow_id | sub("[.]ya?ml$"; "")) as $wf |
|
||||
"forgejo_actions_run_duration_seconds{repo=\"\($r)\",workflow=\"\($wf)\"} \(($latest.duration // 0) / 1000000000 | floor)",
|
||||
([$sorted[] | select(.status == "success")] |
|
||||
if length > 0 then
|
||||
.[0] as $last_ok |
|
||||
"forgejo_actions_last_success_timestamp_seconds{repo=\"\($r)\",workflow=\"\($wf)\"} \($last_ok.stopped | epoch)"
|
||||
else empty end)
|
||||
else empty end' 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
} > "$TEMP_FILE"
|
||||
|
||||
# Atomic move
|
||||
mv "$TEMP_FILE" "$OUTPUT_FILE"
|
||||
Loading…
Add table
Add a link
Reference in a new issue