Add Grafana Alloy and Loki for unified observability

- Add ansible/roles/alloy/ - replaces node_exporter for metrics collection
  - Uses prometheus.exporter.unix with textfile collector
  - Pushes metrics to Prometheus via remote_write
  - Collects logs from brew services and mcquack LaunchAgents
  - Forwards logs to Loki

- Add ansible/roles/loki/ - log storage and query engine
  - Single-node filesystem-based deployment
  - TSDB storage with 31-day retention
  - Integrated with Grafana as datasource

- Update Prometheus to enable remote_write receiver
  - Remove node-exporter-indri scrape job (Alloy pushes instead)
  - Keep sifaka scraping via traditional node_exporter

- Update Grafana datasources to include Loki

- Update indri-services-check to verify Loki and Alloy

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-01-15 11:51:50 -08:00
commit 09c432e0c1
13 changed files with 327 additions and 10 deletions

View file

@ -2,8 +2,12 @@
- name: Configure indri
hosts: indri
roles:
- role: node_exporter
tags: node_exporter
# Loki must come before Alloy (Alloy pushes logs to Loki)
- role: loki
tags: loki
# Alloy replaces node_exporter for metrics and adds log collection
- role: alloy
tags: alloy
- role: prometheus
tags: prometheus
- role: grafana

View file

@ -0,0 +1,65 @@
---
# Grafana Alloy configuration
# Textfile collector directory (same as node_exporter for compatibility)
alloy_textfile_dir: /opt/homebrew/var/node_exporter/textfile
# Prometheus remote write endpoint
alloy_prometheus_url: "http://localhost:9090/api/v1/write"
# Loki endpoint (used in Phase 2)
alloy_loki_url: "http://localhost:3100/loki/api/v1/push"
# Instance label for metrics
alloy_instance_label: indri
# Scrape interval
alloy_scrape_interval: "15s"
# Config paths
alloy_config_dir: /opt/homebrew/etc/grafana-alloy
alloy_data_dir: /opt/homebrew/var/lib/grafana-alloy/data
# Log paths to collect
alloy_brew_logs:
- path: /opt/homebrew/var/log/grafana-stdout.log
service: grafana
stream: stdout
- path: /opt/homebrew/var/log/grafana-stderr.log
service: grafana
stream: stderr
- path: /opt/homebrew/var/log/forgejo.log
service: forgejo
stream: stdout
- path: /opt/homebrew/var/log/prometheus.err.log
service: prometheus
stream: stderr
- path: /opt/homebrew/var/log/tailscaled.log
service: tailscale
stream: stdout
- path: /opt/homebrew/var/transmission/transmission-daemon.log
service: transmission
stream: stdout
alloy_mcquack_logs:
- path: /Users/erichblume/Library/Logs/mcquack.devpi.out.log
service: devpi
stream: stdout
- path: /Users/erichblume/Library/Logs/mcquack.devpi.err.log
service: devpi
stream: stderr
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.out.log
service: kiwix
stream: stdout
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.err.log
service: kiwix
stream: stderr
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.out.log
service: borgmatic
stream: stdout
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.err.log
service: borgmatic
stream: stderr
# Enable log collection (requires Loki to be running)
alloy_collect_logs: true

View file

@ -0,0 +1,3 @@
---
- name: restart alloy
ansible.builtin.command: brew services restart grafana-alloy

View file

@ -0,0 +1,45 @@
---
# Grafana Alloy installation and configuration
# Replaces node_exporter for metrics, adds log collection
- name: Install grafana-alloy via homebrew
community.general.homebrew:
name: grafana-alloy
state: present
- name: Ensure alloy config directory exists
ansible.builtin.file:
path: "{{ alloy_config_dir }}"
state: directory
mode: '0755'
- name: Ensure alloy data directory exists
ansible.builtin.file:
path: "{{ alloy_data_dir }}"
state: directory
mode: '0755'
- name: Ensure textfile collector directory exists
ansible.builtin.file:
path: "{{ alloy_textfile_dir }}"
state: directory
mode: '0755'
- name: Deploy alloy configuration
ansible.builtin.template:
src: config.alloy.j2
dest: "{{ alloy_config_dir }}/config.alloy"
mode: '0644'
notify: restart alloy
- name: Stop node_exporter service (replaced by alloy)
ansible.builtin.command: brew services stop node_exporter
register: node_exporter_stop
changed_when: "'Stopping' in node_exporter_stop.stdout or 'Successfully stopped' in node_exporter_stop.stdout"
failed_when: false
- name: Ensure alloy service is started
ansible.builtin.command: brew services start grafana-alloy
register: brew_start
changed_when: "'Successfully started' in brew_start.stdout"
failed_when: false

View file

@ -0,0 +1,87 @@
// {{ ansible_managed }}
// Grafana Alloy configuration for {{ alloy_instance_label }}
// Collects system metrics (replacing node_exporter) and logs
// ============== METRICS COLLECTION ==============
// System metrics exporter (replaces node_exporter)
prometheus.exporter.unix "system" {
textfile {
directory = "{{ alloy_textfile_dir }}"
}
}
// Scrape system metrics
prometheus.scrape "system" {
targets = prometheus.exporter.unix.system.targets
forward_to = [prometheus.relabel.instance.receiver]
scrape_interval = "{{ alloy_scrape_interval }}"
}
// Add instance label to match existing setup
prometheus.relabel "instance" {
forward_to = [prometheus.remote_write.prometheus.receiver]
rule {
target_label = "instance"
replacement = "{{ alloy_instance_label }}"
}
}
// Push metrics to Prometheus via remote_write
prometheus.remote_write "prometheus" {
endpoint {
url = "{{ alloy_prometheus_url }}"
}
}
{% if alloy_collect_logs %}
// ============== LOG COLLECTION ==============
// Discover log files - brew services
local.file_match "brew_logs" {
path_targets = [
{% for log in alloy_brew_logs %}
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
{% endfor %}
]
}
// Discover log files - mcquack LaunchAgents
local.file_match "mcquack_logs" {
path_targets = [
{% for log in alloy_mcquack_logs %}
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
{% endfor %}
]
}
// Read and forward brew service logs
loki.source.file "brew_logs" {
targets = local.file_match.brew_logs.targets
forward_to = [loki.relabel.add_host.receiver]
}
// Read and forward mcquack service logs
loki.source.file "mcquack_logs" {
targets = local.file_match.mcquack_logs.targets
forward_to = [loki.relabel.add_host.receiver]
}
// Add host label to all logs
loki.relabel "add_host" {
forward_to = [loki.write.loki.receiver]
rule {
target_label = "host"
replacement = "{{ alloy_instance_label }}"
}
}
// Write logs to Loki
loki.write "loki" {
endpoint {
url = "{{ alloy_loki_url }}"
}
}
{% endif %}

View file

@ -10,3 +10,11 @@ datasources:
url: http://localhost:9090
isDefault: true
editable: false
- name: Loki
type: loki
access: proxy
orgId: 1
uid: loki
url: http://localhost:3100
editable: false

View file

@ -0,0 +1,12 @@
---
# Loki configuration
# Server settings
loki_http_port: 3100
# Storage paths
loki_data_dir: /opt/homebrew/var/loki
loki_config_file: /opt/homebrew/etc/loki-local-config.yaml
# Retention settings
loki_retention_period: 744h # 31 days

View file

@ -0,0 +1,3 @@
---
- name: restart loki
ansible.builtin.command: brew services restart loki

View file

@ -0,0 +1,38 @@
---
# Loki installation and configuration
- name: Install loki via homebrew
community.general.homebrew:
name: loki
state: present
- name: Ensure loki data directory exists
ansible.builtin.file:
path: "{{ loki_data_dir }}"
state: directory
mode: '0755'
- name: Ensure loki chunks directory exists
ansible.builtin.file:
path: "{{ loki_data_dir }}/chunks"
state: directory
mode: '0755'
- name: Ensure loki rules directory exists
ansible.builtin.file:
path: "{{ loki_data_dir }}/rules"
state: directory
mode: '0755'
- name: Deploy loki configuration
ansible.builtin.template:
src: loki-config.yaml.j2
dest: "{{ loki_config_file }}"
mode: '0644'
notify: restart loki
- name: Ensure loki service is started
ansible.builtin.command: brew services start loki
register: brew_start
changed_when: "'Successfully started' in brew_start.stdout"
failed_when: false

View file

@ -0,0 +1,53 @@
# {{ ansible_managed }}
# Loki configuration for single-node deployment
auth_enabled: false
server:
http_listen_port: {{ loki_http_port }}
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: {{ loki_data_dir }}
storage:
filesystem:
chunks_directory: {{ loki_data_dir }}/chunks
rules_directory: {{ loki_data_dir }}/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
storage_config:
tsdb_shipper:
active_index_directory: {{ loki_data_dir }}/tsdb-index
cache_location: {{ loki_data_dir }}/tsdb-cache
limits_config:
retention_period: {{ loki_retention_period }}
compactor:
working_directory: {{ loki_data_dir }}/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
delete_request_store: filesystem

View file

@ -1,3 +1,4 @@
--config.file /opt/homebrew/etc/prometheus.yml
--web.listen-address=0.0.0.0:9090
--storage.tsdb.path /opt/homebrew/var/prometheus
--web.enable-remote-write-receiver

View file

@ -2,14 +2,10 @@
global:
scrape_interval: 15s
# Note: indri metrics are pushed via Alloy remote_write
# Sifaka still uses traditional scraping via node_exporter
scrape_configs:
- job_name: "node-exporter-indri"
static_configs:
- targets: ["localhost:9090"]
- targets: ["localhost:9100"]
relabel_configs:
- target_label: instance
replacement: indri
- job_name: "node-exporter-sifaka"
static_configs:
- targets: ["sifaka:9100"]

View file

@ -43,9 +43,10 @@ echo ""
# Check via SSH that services are running on indri
echo "Local services (via launchctl/brew services):"
check_service "loki" "ssh indri 'brew services list | grep loki | grep started'"
check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'"
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
check_service "node_exporter" "ssh indri 'brew services list | grep node_exporter | grep started'"
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
@ -54,6 +55,7 @@ check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
echo ""
echo "HTTP endpoints (via Tailscale):"
check_http "Loki" "http://indri:3100/ready"
check_http "Prometheus" "http://indri:9090/-/healthy"
check_http "Grafana" "http://indri:3000/api/health"
check_http "Kiwix" "http://indri:5501/"