Add Grafana Alloy and Loki for unified observability (#11)
## Summary
- Add Grafana Alloy to replace node_exporter for metrics collection
- Add Loki for log aggregation and storage
- Configure Alloy to collect logs from all services (grafana, forgejo, prometheus, tailscale, transmission, devpi, kiwix, borgmatic)
- Update Prometheus to accept metrics via remote_write
- Add Loki datasource to Grafana
## Test plan
- [ ] Run \`mise run provision-indri -- --check --diff\` to verify changes
- [ ] Apply with \`mise run provision-indri\`
- [ ] Verify services: \`mise run indri-services-check\`
- [ ] Check Grafana Explore with Loki datasource
- [ ] Query logs: \`{service="grafana"}\`
- [ ] Verify metrics still flowing to Prometheus dashboards
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/11
This commit is contained in:
parent
070f26dc6d
commit
242c1880de
17 changed files with 799 additions and 10 deletions
|
|
@ -2,8 +2,10 @@
|
||||||
- name: Configure indri
|
- name: Configure indri
|
||||||
hosts: indri
|
hosts: indri
|
||||||
roles:
|
roles:
|
||||||
- role: node_exporter
|
- role: loki
|
||||||
tags: node_exporter
|
tags: loki
|
||||||
|
- role: alloy
|
||||||
|
tags: alloy
|
||||||
- role: prometheus
|
- role: prometheus
|
||||||
tags: prometheus
|
tags: prometheus
|
||||||
- role: grafana
|
- role: grafana
|
||||||
|
|
|
||||||
65
ansible/roles/alloy/defaults/main.yml
Normal file
65
ansible/roles/alloy/defaults/main.yml
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
---
|
||||||
|
# Grafana Alloy configuration
|
||||||
|
|
||||||
|
# Textfile collector directory (same as node_exporter for compatibility)
|
||||||
|
alloy_textfile_dir: /opt/homebrew/var/node_exporter/textfile
|
||||||
|
|
||||||
|
# Prometheus remote write endpoint
|
||||||
|
alloy_prometheus_url: "http://localhost:9090/api/v1/write"
|
||||||
|
|
||||||
|
# Loki endpoint (used in Phase 2)
|
||||||
|
alloy_loki_url: "http://localhost:3100/loki/api/v1/push"
|
||||||
|
|
||||||
|
# Instance label for metrics
|
||||||
|
alloy_instance_label: indri
|
||||||
|
|
||||||
|
# Scrape interval
|
||||||
|
alloy_scrape_interval: "15s"
|
||||||
|
|
||||||
|
# Config paths
|
||||||
|
alloy_config_dir: /opt/homebrew/etc/grafana-alloy
|
||||||
|
alloy_data_dir: /opt/homebrew/var/lib/grafana-alloy/data
|
||||||
|
|
||||||
|
# Log paths to collect
|
||||||
|
alloy_brew_logs:
|
||||||
|
- path: /opt/homebrew/var/log/grafana-stdout.log
|
||||||
|
service: grafana
|
||||||
|
stream: stdout
|
||||||
|
- path: /opt/homebrew/var/log/grafana-stderr.log
|
||||||
|
service: grafana
|
||||||
|
stream: stderr
|
||||||
|
- path: /opt/homebrew/var/log/forgejo.log
|
||||||
|
service: forgejo
|
||||||
|
stream: stdout
|
||||||
|
- path: /opt/homebrew/var/log/prometheus.err.log
|
||||||
|
service: prometheus
|
||||||
|
stream: stderr
|
||||||
|
- path: /opt/homebrew/var/log/tailscaled.log
|
||||||
|
service: tailscale
|
||||||
|
stream: stdout
|
||||||
|
- path: /opt/homebrew/var/transmission/transmission-daemon.log
|
||||||
|
service: transmission
|
||||||
|
stream: stdout
|
||||||
|
|
||||||
|
alloy_mcquack_logs:
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.devpi.out.log
|
||||||
|
service: devpi
|
||||||
|
stream: stdout
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.devpi.err.log
|
||||||
|
service: devpi
|
||||||
|
stream: stderr
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.out.log
|
||||||
|
service: kiwix
|
||||||
|
stream: stdout
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.err.log
|
||||||
|
service: kiwix
|
||||||
|
stream: stderr
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.out.log
|
||||||
|
service: borgmatic
|
||||||
|
stream: stdout
|
||||||
|
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.err.log
|
||||||
|
service: borgmatic
|
||||||
|
stream: stderr
|
||||||
|
|
||||||
|
# Enable log collection (requires Loki to be running)
|
||||||
|
alloy_collect_logs: true
|
||||||
3
ansible/roles/alloy/handlers/main.yml
Normal file
3
ansible/roles/alloy/handlers/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
---
|
||||||
|
- name: restart alloy
|
||||||
|
ansible.builtin.command: brew services restart grafana-alloy
|
||||||
4
ansible/roles/alloy/meta/main.yml
Normal file
4
ansible/roles/alloy/meta/main.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
---
|
||||||
|
dependencies:
|
||||||
|
- role: prometheus
|
||||||
|
- role: loki
|
||||||
45
ansible/roles/alloy/tasks/main.yml
Normal file
45
ansible/roles/alloy/tasks/main.yml
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
---
|
||||||
|
# Grafana Alloy installation and configuration
|
||||||
|
# Replaces node_exporter for metrics, adds log collection
|
||||||
|
|
||||||
|
- name: Install grafana-alloy via homebrew
|
||||||
|
community.general.homebrew:
|
||||||
|
name: grafana-alloy
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: Ensure alloy config directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ alloy_config_dir }}"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Ensure alloy data directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ alloy_data_dir }}"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Ensure textfile collector directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ alloy_textfile_dir }}"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Deploy alloy configuration
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: config.alloy.j2
|
||||||
|
dest: "{{ alloy_config_dir }}/config.alloy"
|
||||||
|
mode: '0644'
|
||||||
|
notify: restart alloy
|
||||||
|
|
||||||
|
- name: Stop node_exporter service (replaced by alloy)
|
||||||
|
ansible.builtin.command: brew services stop node_exporter
|
||||||
|
register: node_exporter_stop
|
||||||
|
changed_when: "'Stopping' in node_exporter_stop.stdout or 'Successfully stopped' in node_exporter_stop.stdout"
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Ensure alloy service is started
|
||||||
|
ansible.builtin.command: brew services start grafana-alloy
|
||||||
|
register: brew_start
|
||||||
|
changed_when: "'Successfully started' in brew_start.stdout"
|
||||||
|
failed_when: false
|
||||||
87
ansible/roles/alloy/templates/config.alloy.j2
Normal file
87
ansible/roles/alloy/templates/config.alloy.j2
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
// {{ ansible_managed }}
|
||||||
|
// Grafana Alloy configuration for {{ alloy_instance_label }}
|
||||||
|
// Collects system metrics (replacing node_exporter) and logs
|
||||||
|
|
||||||
|
// ============== METRICS COLLECTION ==============
|
||||||
|
|
||||||
|
// System metrics exporter (replaces node_exporter)
|
||||||
|
prometheus.exporter.unix "system" {
|
||||||
|
textfile {
|
||||||
|
directory = "{{ alloy_textfile_dir }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape system metrics
|
||||||
|
prometheus.scrape "system" {
|
||||||
|
targets = prometheus.exporter.unix.system.targets
|
||||||
|
forward_to = [prometheus.relabel.instance.receiver]
|
||||||
|
scrape_interval = "{{ alloy_scrape_interval }}"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add instance label to match existing setup
|
||||||
|
prometheus.relabel "instance" {
|
||||||
|
forward_to = [prometheus.remote_write.prometheus.receiver]
|
||||||
|
|
||||||
|
rule {
|
||||||
|
target_label = "instance"
|
||||||
|
replacement = "{{ alloy_instance_label }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push metrics to Prometheus via remote_write
|
||||||
|
prometheus.remote_write "prometheus" {
|
||||||
|
endpoint {
|
||||||
|
url = "{{ alloy_prometheus_url }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{% if alloy_collect_logs %}
|
||||||
|
// ============== LOG COLLECTION ==============
|
||||||
|
|
||||||
|
// Discover log files - brew services
|
||||||
|
local.file_match "brew_logs" {
|
||||||
|
path_targets = [
|
||||||
|
{% for log in alloy_brew_logs %}
|
||||||
|
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
|
||||||
|
{% endfor %}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discover log files - mcquack LaunchAgents
|
||||||
|
local.file_match "mcquack_logs" {
|
||||||
|
path_targets = [
|
||||||
|
{% for log in alloy_mcquack_logs %}
|
||||||
|
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
|
||||||
|
{% endfor %}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and forward brew service logs
|
||||||
|
loki.source.file "brew_logs" {
|
||||||
|
targets = local.file_match.brew_logs.targets
|
||||||
|
forward_to = [loki.relabel.add_host.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and forward mcquack service logs
|
||||||
|
loki.source.file "mcquack_logs" {
|
||||||
|
targets = local.file_match.mcquack_logs.targets
|
||||||
|
forward_to = [loki.relabel.add_host.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add host label to all logs
|
||||||
|
loki.relabel "add_host" {
|
||||||
|
forward_to = [loki.write.loki.receiver]
|
||||||
|
|
||||||
|
rule {
|
||||||
|
target_label = "host"
|
||||||
|
replacement = "{{ alloy_instance_label }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write logs to Loki
|
||||||
|
loki.write "loki" {
|
||||||
|
endpoint {
|
||||||
|
url = "{{ alloy_loki_url }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{% endif %}
|
||||||
460
ansible/roles/grafana/files/dashboards/loki.json
Normal file
460
ansible/roles/grafana/files/dashboards/loki.json
Normal file
|
|
@ -0,0 +1,460 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 1073741824 },
|
||||||
|
{ "color": "red", "value": 5368709120 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "bytes"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["lastNotNull"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum(loki_ingester_memory_chunks_bytes)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Chunks in Memory",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["lastNotNull"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum(loki_ingester_memory_chunks)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Chunks",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["lastNotNull"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum(loki_ingester_streams_created_total) - sum(loki_ingester_streams_removed_total)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Streams",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.5 },
|
||||||
|
{ "color": "red", "value": 0.9 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percentunit"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["lastNotNull"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "1 - (go_memstats_heap_idle_bytes{job=\"loki\"} / go_memstats_heap_sys_bytes{job=\"loki\"})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Heap Usage",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||||
|
"insertNulls": false,
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": { "type": "linear" },
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": { "group": "A", "mode": "none" },
|
||||||
|
"thresholdsStyle": { "mode": "off" }
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "green", "value": null }]
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "rate(loki_distributor_bytes_received_total[5m])",
|
||||||
|
"legendFormat": "Bytes Received",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "rate(loki_ingester_chunk_stored_bytes_total[5m])",
|
||||||
|
"legendFormat": "Bytes Stored",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Ingestion Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||||
|
"insertNulls": false,
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": { "type": "linear" },
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": { "group": "A", "mode": "none" },
|
||||||
|
"thresholdsStyle": { "mode": "off" }
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "green", "value": null }]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "rate(loki_distributor_lines_received_total[5m])",
|
||||||
|
"legendFormat": "Lines/sec",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Log Lines Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||||
|
"insertNulls": false,
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": { "type": "linear" },
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": { "group": "A", "mode": "none" },
|
||||||
|
"thresholdsStyle": { "mode": "off" }
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "green", "value": null }]
|
||||||
|
},
|
||||||
|
"unit": "bytes"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum(loki_ingester_memory_chunks_bytes)",
|
||||||
|
"legendFormat": "Chunks in Memory",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "go_memstats_heap_inuse_bytes{job=\"loki\"}",
|
||||||
|
"legendFormat": "Heap In Use",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory Usage Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||||
|
"insertNulls": false,
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": { "type": "linear" },
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": { "group": "A", "mode": "none" },
|
||||||
|
"thresholdsStyle": { "mode": "off" }
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "green", "value": null }]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
|
||||||
|
"id": 8,
|
||||||
|
"options": {
|
||||||
|
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "increase(loki_ingester_chunk_stored_bytes_total[24h])",
|
||||||
|
"legendFormat": "Bytes Stored (24h)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Storage Growth (24h rolling)",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "1m",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": ["loki", "logs"],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "",
|
||||||
|
"title": "Loki",
|
||||||
|
"uid": "loki-homelab",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
4
ansible/roles/grafana/meta/main.yml
Normal file
4
ansible/roles/grafana/meta/main.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
---
|
||||||
|
dependencies:
|
||||||
|
- role: prometheus
|
||||||
|
- role: loki
|
||||||
|
|
@ -10,3 +10,11 @@ datasources:
|
||||||
url: http://localhost:9090
|
url: http://localhost:9090
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: false
|
editable: false
|
||||||
|
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
orgId: 1
|
||||||
|
uid: loki
|
||||||
|
url: http://localhost:3100
|
||||||
|
editable: false
|
||||||
|
|
|
||||||
12
ansible/roles/loki/defaults/main.yml
Normal file
12
ansible/roles/loki/defaults/main.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
---
|
||||||
|
# Loki configuration
|
||||||
|
|
||||||
|
# Server settings
|
||||||
|
loki_http_port: 3100
|
||||||
|
|
||||||
|
# Storage paths
|
||||||
|
loki_data_dir: /opt/homebrew/var/loki
|
||||||
|
loki_config_file: /opt/homebrew/etc/loki-local-config.yaml
|
||||||
|
|
||||||
|
# Retention settings
|
||||||
|
loki_retention_period: 744h # 31 days
|
||||||
3
ansible/roles/loki/handlers/main.yml
Normal file
3
ansible/roles/loki/handlers/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
---
|
||||||
|
- name: restart loki
|
||||||
|
ansible.builtin.command: brew services restart loki
|
||||||
2
ansible/roles/loki/meta/main.yml
Normal file
2
ansible/roles/loki/meta/main.yml
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
---
|
||||||
|
dependencies: []
|
||||||
38
ansible/roles/loki/tasks/main.yml
Normal file
38
ansible/roles/loki/tasks/main.yml
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
---
|
||||||
|
# Loki installation and configuration
|
||||||
|
|
||||||
|
- name: Install loki via homebrew
|
||||||
|
community.general.homebrew:
|
||||||
|
name: loki
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: Ensure loki data directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ loki_data_dir }}"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Ensure loki chunks directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ loki_data_dir }}/chunks"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Ensure loki rules directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ loki_data_dir }}/rules"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Deploy loki configuration
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: loki-config.yaml.j2
|
||||||
|
dest: "{{ loki_config_file }}"
|
||||||
|
mode: '0644'
|
||||||
|
notify: restart loki
|
||||||
|
|
||||||
|
- name: Ensure loki service is started
|
||||||
|
ansible.builtin.command: brew services start loki
|
||||||
|
register: brew_start
|
||||||
|
changed_when: "'Successfully started' in brew_start.stdout"
|
||||||
|
failed_when: false
|
||||||
53
ansible/roles/loki/templates/loki-config.yaml.j2
Normal file
53
ansible/roles/loki/templates/loki-config.yaml.j2
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
# {{ ansible_managed }}
|
||||||
|
# Loki configuration for single-node deployment
|
||||||
|
|
||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: {{ loki_http_port }}
|
||||||
|
grpc_listen_port: 9096
|
||||||
|
|
||||||
|
common:
|
||||||
|
instance_addr: 127.0.0.1
|
||||||
|
path_prefix: {{ loki_data_dir }}
|
||||||
|
storage:
|
||||||
|
filesystem:
|
||||||
|
chunks_directory: {{ loki_data_dir }}/chunks
|
||||||
|
rules_directory: {{ loki_data_dir }}/rules
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
query_range:
|
||||||
|
results_cache:
|
||||||
|
cache:
|
||||||
|
embedded_cache:
|
||||||
|
enabled: true
|
||||||
|
max_size_mb: 100
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2024-01-01
|
||||||
|
store: tsdb
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
storage_config:
|
||||||
|
tsdb_shipper:
|
||||||
|
active_index_directory: {{ loki_data_dir }}/tsdb-index
|
||||||
|
cache_location: {{ loki_data_dir }}/tsdb-cache
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
retention_period: {{ loki_retention_period }}
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
working_directory: {{ loki_data_dir }}/compactor
|
||||||
|
compaction_interval: 10m
|
||||||
|
retention_enabled: true
|
||||||
|
retention_delete_delay: 2h
|
||||||
|
retention_delete_worker_count: 150
|
||||||
|
delete_request_store: filesystem
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
--config.file /opt/homebrew/etc/prometheus.yml
|
--config.file /opt/homebrew/etc/prometheus.yml
|
||||||
--web.listen-address=0.0.0.0:9090
|
--web.listen-address=0.0.0.0:9090
|
||||||
--storage.tsdb.path /opt/homebrew/var/prometheus
|
--storage.tsdb.path /opt/homebrew/var/prometheus
|
||||||
|
--web.enable-remote-write-receiver
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,14 @@
|
||||||
global:
|
global:
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
# Note: indri system metrics are pushed via Alloy remote_write
|
||||||
|
# Sifaka still uses traditional scraping via node_exporter
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
- job_name: "node-exporter-indri"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:9090"]
|
|
||||||
- targets: ["localhost:9100"]
|
|
||||||
relabel_configs:
|
|
||||||
- target_label: instance
|
|
||||||
replacement: indri
|
|
||||||
- job_name: "node-exporter-sifaka"
|
- job_name: "node-exporter-sifaka"
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["sifaka:9100"]
|
- targets: ["sifaka:9100"]
|
||||||
|
|
||||||
|
- job_name: "loki"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:3100"]
|
||||||
|
|
|
||||||
|
|
@ -43,9 +43,10 @@ echo ""
|
||||||
|
|
||||||
# Check via SSH that services are running on indri
|
# Check via SSH that services are running on indri
|
||||||
echo "Local services (via launchctl/brew services):"
|
echo "Local services (via launchctl/brew services):"
|
||||||
|
check_service "loki" "ssh indri 'brew services list | grep loki | grep started'"
|
||||||
|
check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'"
|
||||||
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
|
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
|
||||||
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
|
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
|
||||||
check_service "node_exporter" "ssh indri 'brew services list | grep node_exporter | grep started'"
|
|
||||||
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
|
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
|
||||||
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
|
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
|
||||||
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
|
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
|
||||||
|
|
@ -54,6 +55,7 @@ check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "HTTP endpoints (via Tailscale):"
|
echo "HTTP endpoints (via Tailscale):"
|
||||||
|
check_http "Loki" "http://indri:3100/ready"
|
||||||
check_http "Prometheus" "http://indri:9090/-/healthy"
|
check_http "Prometheus" "http://indri:9090/-/healthy"
|
||||||
check_http "Grafana" "http://indri:3000/api/health"
|
check_http "Grafana" "http://indri:3000/api/health"
|
||||||
check_http "Kiwix" "http://indri:5501/"
|
check_http "Kiwix" "http://indri:5501/"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue