Add Grafana Alloy and Loki for unified observability
- Add ansible/roles/alloy/ - replaces node_exporter for metrics collection - Uses prometheus.exporter.unix with textfile collector - Pushes metrics to Prometheus via remote_write - Collects logs from brew services and mcquack LaunchAgents - Forwards logs to Loki - Add ansible/roles/loki/ - log storage and query engine - Single-node filesystem-based deployment - TSDB storage with 31-day retention - Integrated with Grafana as datasource - Update Prometheus to enable remote_write receiver - Remove node-exporter-indri scrape job (Alloy pushes instead) - Keep sifaka scraping via traditional node_exporter - Update Grafana datasources to include Loki - Update indri-services-check to verify Loki and Alloy Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
070f26dc6d
commit
09c432e0c1
13 changed files with 327 additions and 10 deletions
|
|
@ -2,8 +2,12 @@
|
|||
- name: Configure indri
|
||||
hosts: indri
|
||||
roles:
|
||||
- role: node_exporter
|
||||
tags: node_exporter
|
||||
# Loki must come before Alloy (Alloy pushes logs to Loki)
|
||||
- role: loki
|
||||
tags: loki
|
||||
# Alloy replaces node_exporter for metrics and adds log collection
|
||||
- role: alloy
|
||||
tags: alloy
|
||||
- role: prometheus
|
||||
tags: prometheus
|
||||
- role: grafana
|
||||
|
|
|
|||
65
ansible/roles/alloy/defaults/main.yml
Normal file
65
ansible/roles/alloy/defaults/main.yml
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
---
|
||||
# Grafana Alloy configuration
|
||||
|
||||
# Textfile collector directory (same as node_exporter for compatibility)
|
||||
alloy_textfile_dir: /opt/homebrew/var/node_exporter/textfile
|
||||
|
||||
# Prometheus remote write endpoint
|
||||
alloy_prometheus_url: "http://localhost:9090/api/v1/write"
|
||||
|
||||
# Loki endpoint (used in Phase 2)
|
||||
alloy_loki_url: "http://localhost:3100/loki/api/v1/push"
|
||||
|
||||
# Instance label for metrics
|
||||
alloy_instance_label: indri
|
||||
|
||||
# Scrape interval
|
||||
alloy_scrape_interval: "15s"
|
||||
|
||||
# Config paths
|
||||
alloy_config_dir: /opt/homebrew/etc/grafana-alloy
|
||||
alloy_data_dir: /opt/homebrew/var/lib/grafana-alloy/data
|
||||
|
||||
# Log paths to collect
|
||||
alloy_brew_logs:
|
||||
- path: /opt/homebrew/var/log/grafana-stdout.log
|
||||
service: grafana
|
||||
stream: stdout
|
||||
- path: /opt/homebrew/var/log/grafana-stderr.log
|
||||
service: grafana
|
||||
stream: stderr
|
||||
- path: /opt/homebrew/var/log/forgejo.log
|
||||
service: forgejo
|
||||
stream: stdout
|
||||
- path: /opt/homebrew/var/log/prometheus.err.log
|
||||
service: prometheus
|
||||
stream: stderr
|
||||
- path: /opt/homebrew/var/log/tailscaled.log
|
||||
service: tailscale
|
||||
stream: stdout
|
||||
- path: /opt/homebrew/var/transmission/transmission-daemon.log
|
||||
service: transmission
|
||||
stream: stdout
|
||||
|
||||
alloy_mcquack_logs:
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.devpi.out.log
|
||||
service: devpi
|
||||
stream: stdout
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.devpi.err.log
|
||||
service: devpi
|
||||
stream: stderr
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.out.log
|
||||
service: kiwix
|
||||
stream: stdout
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.err.log
|
||||
service: kiwix
|
||||
stream: stderr
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.out.log
|
||||
service: borgmatic
|
||||
stream: stdout
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.err.log
|
||||
service: borgmatic
|
||||
stream: stderr
|
||||
|
||||
# Enable log collection (requires Loki to be running)
|
||||
alloy_collect_logs: true
|
||||
3
ansible/roles/alloy/handlers/main.yml
Normal file
3
ansible/roles/alloy/handlers/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
- name: restart alloy
|
||||
ansible.builtin.command: brew services restart grafana-alloy
|
||||
45
ansible/roles/alloy/tasks/main.yml
Normal file
45
ansible/roles/alloy/tasks/main.yml
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
---
|
||||
# Grafana Alloy installation and configuration
|
||||
# Replaces node_exporter for metrics, adds log collection
|
||||
|
||||
- name: Install grafana-alloy via homebrew
|
||||
community.general.homebrew:
|
||||
name: grafana-alloy
|
||||
state: present
|
||||
|
||||
- name: Ensure alloy config directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ alloy_config_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure alloy data directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ alloy_data_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure textfile collector directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ alloy_textfile_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy alloy configuration
|
||||
ansible.builtin.template:
|
||||
src: config.alloy.j2
|
||||
dest: "{{ alloy_config_dir }}/config.alloy"
|
||||
mode: '0644'
|
||||
notify: restart alloy
|
||||
|
||||
- name: Stop node_exporter service (replaced by alloy)
|
||||
ansible.builtin.command: brew services stop node_exporter
|
||||
register: node_exporter_stop
|
||||
changed_when: "'Stopping' in node_exporter_stop.stdout or 'Successfully stopped' in node_exporter_stop.stdout"
|
||||
failed_when: false
|
||||
|
||||
- name: Ensure alloy service is started
|
||||
ansible.builtin.command: brew services start grafana-alloy
|
||||
register: brew_start
|
||||
changed_when: "'Successfully started' in brew_start.stdout"
|
||||
failed_when: false
|
||||
87
ansible/roles/alloy/templates/config.alloy.j2
Normal file
87
ansible/roles/alloy/templates/config.alloy.j2
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
// {{ ansible_managed }}
|
||||
// Grafana Alloy configuration for {{ alloy_instance_label }}
|
||||
// Collects system metrics (replacing node_exporter) and logs
|
||||
|
||||
// ============== METRICS COLLECTION ==============
|
||||
|
||||
// System metrics exporter (replaces node_exporter)
|
||||
prometheus.exporter.unix "system" {
|
||||
textfile {
|
||||
directory = "{{ alloy_textfile_dir }}"
|
||||
}
|
||||
}
|
||||
|
||||
// Scrape system metrics
|
||||
prometheus.scrape "system" {
|
||||
targets = prometheus.exporter.unix.system.targets
|
||||
forward_to = [prometheus.relabel.instance.receiver]
|
||||
scrape_interval = "{{ alloy_scrape_interval }}"
|
||||
}
|
||||
|
||||
// Add instance label to match existing setup
|
||||
prometheus.relabel "instance" {
|
||||
forward_to = [prometheus.remote_write.prometheus.receiver]
|
||||
|
||||
rule {
|
||||
target_label = "instance"
|
||||
replacement = "{{ alloy_instance_label }}"
|
||||
}
|
||||
}
|
||||
|
||||
// Push metrics to Prometheus via remote_write
|
||||
prometheus.remote_write "prometheus" {
|
||||
endpoint {
|
||||
url = "{{ alloy_prometheus_url }}"
|
||||
}
|
||||
}
|
||||
|
||||
{% if alloy_collect_logs %}
|
||||
// ============== LOG COLLECTION ==============
|
||||
|
||||
// Discover log files - brew services
|
||||
local.file_match "brew_logs" {
|
||||
path_targets = [
|
||||
{% for log in alloy_brew_logs %}
|
||||
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
|
||||
{% endfor %}
|
||||
]
|
||||
}
|
||||
|
||||
// Discover log files - mcquack LaunchAgents
|
||||
local.file_match "mcquack_logs" {
|
||||
path_targets = [
|
||||
{% for log in alloy_mcquack_logs %}
|
||||
{__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"},
|
||||
{% endfor %}
|
||||
]
|
||||
}
|
||||
|
||||
// Read and forward brew service logs
|
||||
loki.source.file "brew_logs" {
|
||||
targets = local.file_match.brew_logs.targets
|
||||
forward_to = [loki.relabel.add_host.receiver]
|
||||
}
|
||||
|
||||
// Read and forward mcquack service logs
|
||||
loki.source.file "mcquack_logs" {
|
||||
targets = local.file_match.mcquack_logs.targets
|
||||
forward_to = [loki.relabel.add_host.receiver]
|
||||
}
|
||||
|
||||
// Add host label to all logs
|
||||
loki.relabel "add_host" {
|
||||
forward_to = [loki.write.loki.receiver]
|
||||
|
||||
rule {
|
||||
target_label = "host"
|
||||
replacement = "{{ alloy_instance_label }}"
|
||||
}
|
||||
}
|
||||
|
||||
// Write logs to Loki
|
||||
loki.write "loki" {
|
||||
endpoint {
|
||||
url = "{{ alloy_loki_url }}"
|
||||
}
|
||||
}
|
||||
{% endif %}
|
||||
|
|
@ -10,3 +10,11 @@ datasources:
|
|||
url: http://localhost:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
orgId: 1
|
||||
uid: loki
|
||||
url: http://localhost:3100
|
||||
editable: false
|
||||
|
|
|
|||
12
ansible/roles/loki/defaults/main.yml
Normal file
12
ansible/roles/loki/defaults/main.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
# Loki configuration
|
||||
|
||||
# Server settings
|
||||
loki_http_port: 3100
|
||||
|
||||
# Storage paths
|
||||
loki_data_dir: /opt/homebrew/var/loki
|
||||
loki_config_file: /opt/homebrew/etc/loki-local-config.yaml
|
||||
|
||||
# Retention settings
|
||||
loki_retention_period: 744h # 31 days
|
||||
3
ansible/roles/loki/handlers/main.yml
Normal file
3
ansible/roles/loki/handlers/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
- name: restart loki
|
||||
ansible.builtin.command: brew services restart loki
|
||||
38
ansible/roles/loki/tasks/main.yml
Normal file
38
ansible/roles/loki/tasks/main.yml
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
---
|
||||
# Loki installation and configuration
|
||||
|
||||
- name: Install loki via homebrew
|
||||
community.general.homebrew:
|
||||
name: loki
|
||||
state: present
|
||||
|
||||
- name: Ensure loki data directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ loki_data_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure loki chunks directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ loki_data_dir }}/chunks"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure loki rules directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ loki_data_dir }}/rules"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy loki configuration
|
||||
ansible.builtin.template:
|
||||
src: loki-config.yaml.j2
|
||||
dest: "{{ loki_config_file }}"
|
||||
mode: '0644'
|
||||
notify: restart loki
|
||||
|
||||
- name: Ensure loki service is started
|
||||
ansible.builtin.command: brew services start loki
|
||||
register: brew_start
|
||||
changed_when: "'Successfully started' in brew_start.stdout"
|
||||
failed_when: false
|
||||
53
ansible/roles/loki/templates/loki-config.yaml.j2
Normal file
53
ansible/roles/loki/templates/loki-config.yaml.j2
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# {{ ansible_managed }}
|
||||
# Loki configuration for single-node deployment
|
||||
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: {{ loki_http_port }}
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: {{ loki_data_dir }}
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: {{ loki_data_dir }}/chunks
|
||||
rules_directory: {{ loki_data_dir }}/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2024-01-01
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
storage_config:
|
||||
tsdb_shipper:
|
||||
active_index_directory: {{ loki_data_dir }}/tsdb-index
|
||||
cache_location: {{ loki_data_dir }}/tsdb-cache
|
||||
|
||||
limits_config:
|
||||
retention_period: {{ loki_retention_period }}
|
||||
|
||||
compactor:
|
||||
working_directory: {{ loki_data_dir }}/compactor
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
delete_request_store: filesystem
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
--config.file /opt/homebrew/etc/prometheus.yml
|
||||
--web.listen-address=0.0.0.0:9090
|
||||
--storage.tsdb.path /opt/homebrew/var/prometheus
|
||||
--web.enable-remote-write-receiver
|
||||
|
|
|
|||
|
|
@ -2,14 +2,10 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
|
||||
# Note: indri metrics are pushed via Alloy remote_write
|
||||
# Sifaka still uses traditional scraping via node_exporter
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "node-exporter-indri"
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
- targets: ["localhost:9100"]
|
||||
relabel_configs:
|
||||
- target_label: instance
|
||||
replacement: indri
|
||||
- job_name: "node-exporter-sifaka"
|
||||
static_configs:
|
||||
- targets: ["sifaka:9100"]
|
||||
|
|
|
|||
|
|
@ -43,9 +43,10 @@ echo ""
|
|||
|
||||
# Check via SSH that services are running on indri
|
||||
echo "Local services (via launchctl/brew services):"
|
||||
check_service "loki" "ssh indri 'brew services list | grep loki | grep started'"
|
||||
check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'"
|
||||
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
|
||||
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
|
||||
check_service "node_exporter" "ssh indri 'brew services list | grep node_exporter | grep started'"
|
||||
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
|
||||
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
|
||||
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
|
||||
|
|
@ -54,6 +55,7 @@ check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
|
|||
|
||||
echo ""
|
||||
echo "HTTP endpoints (via Tailscale):"
|
||||
check_http "Loki" "http://indri:3100/ready"
|
||||
check_http "Prometheus" "http://indri:9090/-/healthy"
|
||||
check_http "Grafana" "http://indri:3000/api/health"
|
||||
check_http "Kiwix" "http://indri:5501/"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue