From 242c1880debe34616520ae0df1804c4b809f2309 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 15 Jan 2026 12:24:13 -0800 Subject: [PATCH] Add Grafana Alloy and Loki for unified observability (#11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Add Grafana Alloy to replace node_exporter for metrics collection - Add Loki for log aggregation and storage - Configure Alloy to collect logs from all services (grafana, forgejo, prometheus, tailscale, transmission, devpi, kiwix, borgmatic) - Update Prometheus to accept metrics via remote_write - Add Loki datasource to Grafana ## Test plan - [ ] Run \`mise run provision-indri -- --check --diff\` to verify changes - [ ] Apply with \`mise run provision-indri\` - [ ] Verify services: \`mise run indri-services-check\` - [ ] Check Grafana Explore with Loki datasource - [ ] Query logs: \`{service="grafana"}\` - [ ] Verify metrics still flowing to Prometheus dashboards 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/11 --- ansible/playbooks/indri.yml | 6 +- ansible/roles/alloy/defaults/main.yml | 65 +++ ansible/roles/alloy/handlers/main.yml | 3 + ansible/roles/alloy/meta/main.yml | 4 + ansible/roles/alloy/tasks/main.yml | 45 ++ ansible/roles/alloy/templates/config.alloy.j2 | 87 ++++ .../roles/grafana/files/dashboards/loki.json | 460 ++++++++++++++++++ ansible/roles/grafana/meta/main.yml | 4 + .../grafana/templates/datasources.yaml.j2 | 8 + ansible/roles/loki/defaults/main.yml | 12 + ansible/roles/loki/handlers/main.yml | 3 + ansible/roles/loki/meta/main.yml | 2 + ansible/roles/loki/tasks/main.yml | 38 ++ .../roles/loki/templates/loki-config.yaml.j2 | 53 ++ .../prometheus/templates/prometheus.args.j2 | 1 + .../prometheus/templates/prometheus.yml.j2 | 14 +- mise-tasks/indri-services-check | 4 +- 17 files changed, 799 insertions(+), 10 deletions(-) create mode 100644 ansible/roles/alloy/defaults/main.yml create mode 100644 ansible/roles/alloy/handlers/main.yml create mode 100644 ansible/roles/alloy/meta/main.yml create mode 100644 ansible/roles/alloy/tasks/main.yml create mode 100644 ansible/roles/alloy/templates/config.alloy.j2 create mode 100644 ansible/roles/grafana/files/dashboards/loki.json create mode 100644 ansible/roles/grafana/meta/main.yml create mode 100644 ansible/roles/loki/defaults/main.yml create mode 100644 ansible/roles/loki/handlers/main.yml create mode 100644 ansible/roles/loki/meta/main.yml create mode 100644 ansible/roles/loki/tasks/main.yml create mode 100644 ansible/roles/loki/templates/loki-config.yaml.j2 diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index 12efb5d..7d14d92 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -2,8 +2,10 @@ - name: Configure indri hosts: indri roles: - - role: node_exporter - tags: node_exporter + - role: loki + tags: loki + - role: alloy + tags: alloy - role: prometheus tags: prometheus - role: grafana diff --git a/ansible/roles/alloy/defaults/main.yml b/ansible/roles/alloy/defaults/main.yml new file mode 100644 index 0000000..d4b83a2 --- /dev/null +++ b/ansible/roles/alloy/defaults/main.yml @@ -0,0 +1,65 @@ +--- +# Grafana Alloy configuration + +# Textfile collector directory (same as node_exporter for compatibility) +alloy_textfile_dir: /opt/homebrew/var/node_exporter/textfile + +# Prometheus remote write endpoint +alloy_prometheus_url: "http://localhost:9090/api/v1/write" + +# Loki endpoint (used in Phase 2) +alloy_loki_url: "http://localhost:3100/loki/api/v1/push" + +# Instance label for metrics +alloy_instance_label: indri + +# Scrape interval +alloy_scrape_interval: "15s" + +# Config paths +alloy_config_dir: /opt/homebrew/etc/grafana-alloy +alloy_data_dir: /opt/homebrew/var/lib/grafana-alloy/data + +# Log paths to collect +alloy_brew_logs: + - path: /opt/homebrew/var/log/grafana-stdout.log + service: grafana + stream: stdout + - path: /opt/homebrew/var/log/grafana-stderr.log + service: grafana + stream: stderr + - path: /opt/homebrew/var/log/forgejo.log + service: forgejo + stream: stdout + - path: /opt/homebrew/var/log/prometheus.err.log + service: prometheus + stream: stderr + - path: /opt/homebrew/var/log/tailscaled.log + service: tailscale + stream: stdout + - path: /opt/homebrew/var/transmission/transmission-daemon.log + service: transmission + stream: stdout + +alloy_mcquack_logs: + - path: /Users/erichblume/Library/Logs/mcquack.devpi.out.log + service: devpi + stream: stdout + - path: /Users/erichblume/Library/Logs/mcquack.devpi.err.log + service: devpi + stream: stderr + - path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.out.log + service: kiwix + stream: stdout + - path: /Users/erichblume/Library/Logs/mcquack.kiwix-serve.err.log + service: kiwix + stream: stderr + - path: /Users/erichblume/Library/Logs/mcquack.borgmatic.out.log + service: borgmatic + stream: stdout + - path: /Users/erichblume/Library/Logs/mcquack.borgmatic.err.log + service: borgmatic + stream: stderr + +# Enable log collection (requires Loki to be running) +alloy_collect_logs: true diff --git a/ansible/roles/alloy/handlers/main.yml b/ansible/roles/alloy/handlers/main.yml new file mode 100644 index 0000000..eeaedb9 --- /dev/null +++ b/ansible/roles/alloy/handlers/main.yml @@ -0,0 +1,3 @@ +--- +- name: restart alloy + ansible.builtin.command: brew services restart grafana-alloy diff --git a/ansible/roles/alloy/meta/main.yml b/ansible/roles/alloy/meta/main.yml new file mode 100644 index 0000000..9e57ded --- /dev/null +++ b/ansible/roles/alloy/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: prometheus + - role: loki diff --git a/ansible/roles/alloy/tasks/main.yml b/ansible/roles/alloy/tasks/main.yml new file mode 100644 index 0000000..49e3802 --- /dev/null +++ b/ansible/roles/alloy/tasks/main.yml @@ -0,0 +1,45 @@ +--- +# Grafana Alloy installation and configuration +# Replaces node_exporter for metrics, adds log collection + +- name: Install grafana-alloy via homebrew + community.general.homebrew: + name: grafana-alloy + state: present + +- name: Ensure alloy config directory exists + ansible.builtin.file: + path: "{{ alloy_config_dir }}" + state: directory + mode: '0755' + +- name: Ensure alloy data directory exists + ansible.builtin.file: + path: "{{ alloy_data_dir }}" + state: directory + mode: '0755' + +- name: Ensure textfile collector directory exists + ansible.builtin.file: + path: "{{ alloy_textfile_dir }}" + state: directory + mode: '0755' + +- name: Deploy alloy configuration + ansible.builtin.template: + src: config.alloy.j2 + dest: "{{ alloy_config_dir }}/config.alloy" + mode: '0644' + notify: restart alloy + +- name: Stop node_exporter service (replaced by alloy) + ansible.builtin.command: brew services stop node_exporter + register: node_exporter_stop + changed_when: "'Stopping' in node_exporter_stop.stdout or 'Successfully stopped' in node_exporter_stop.stdout" + failed_when: false + +- name: Ensure alloy service is started + ansible.builtin.command: brew services start grafana-alloy + register: brew_start + changed_when: "'Successfully started' in brew_start.stdout" + failed_when: false diff --git a/ansible/roles/alloy/templates/config.alloy.j2 b/ansible/roles/alloy/templates/config.alloy.j2 new file mode 100644 index 0000000..a830533 --- /dev/null +++ b/ansible/roles/alloy/templates/config.alloy.j2 @@ -0,0 +1,87 @@ +// {{ ansible_managed }} +// Grafana Alloy configuration for {{ alloy_instance_label }} +// Collects system metrics (replacing node_exporter) and logs + +// ============== METRICS COLLECTION ============== + +// System metrics exporter (replaces node_exporter) +prometheus.exporter.unix "system" { + textfile { + directory = "{{ alloy_textfile_dir }}" + } +} + +// Scrape system metrics +prometheus.scrape "system" { + targets = prometheus.exporter.unix.system.targets + forward_to = [prometheus.relabel.instance.receiver] + scrape_interval = "{{ alloy_scrape_interval }}" +} + +// Add instance label to match existing setup +prometheus.relabel "instance" { + forward_to = [prometheus.remote_write.prometheus.receiver] + + rule { + target_label = "instance" + replacement = "{{ alloy_instance_label }}" + } +} + +// Push metrics to Prometheus via remote_write +prometheus.remote_write "prometheus" { + endpoint { + url = "{{ alloy_prometheus_url }}" + } +} + +{% if alloy_collect_logs %} +// ============== LOG COLLECTION ============== + +// Discover log files - brew services +local.file_match "brew_logs" { + path_targets = [ +{% for log in alloy_brew_logs %} + {__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"}, +{% endfor %} + ] +} + +// Discover log files - mcquack LaunchAgents +local.file_match "mcquack_logs" { + path_targets = [ +{% for log in alloy_mcquack_logs %} + {__path__ = "{{ log.path }}", service = "{{ log.service }}", stream = "{{ log.stream }}"}, +{% endfor %} + ] +} + +// Read and forward brew service logs +loki.source.file "brew_logs" { + targets = local.file_match.brew_logs.targets + forward_to = [loki.relabel.add_host.receiver] +} + +// Read and forward mcquack service logs +loki.source.file "mcquack_logs" { + targets = local.file_match.mcquack_logs.targets + forward_to = [loki.relabel.add_host.receiver] +} + +// Add host label to all logs +loki.relabel "add_host" { + forward_to = [loki.write.loki.receiver] + + rule { + target_label = "host" + replacement = "{{ alloy_instance_label }}" + } +} + +// Write logs to Loki +loki.write "loki" { + endpoint { + url = "{{ alloy_loki_url }}" + } +} +{% endif %} diff --git a/ansible/roles/grafana/files/dashboards/loki.json b/ansible/roles/grafana/files/dashboards/loki.json new file mode 100644 index 0000000..a935ce5 --- /dev/null +++ b/ansible/roles/grafana/files/dashboards/loki.json @@ -0,0 +1,460 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1073741824 }, + { "color": "red", "value": 5368709120 } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(loki_ingester_memory_chunks_bytes)", + "refId": "A" + } + ], + "title": "Chunks in Memory", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(loki_ingester_memory_chunks)", + "refId": "A" + } + ], + "title": "Active Chunks", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(loki_ingester_streams_created_total) - sum(loki_ingester_streams_removed_total)", + "refId": "A" + } + ], + "title": "Active Streams", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.5 }, + { "color": "red", "value": 0.9 } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "1 - (go_memstats_heap_idle_bytes{job=\"loki\"} / go_memstats_heap_sys_bytes{job=\"loki\"})", + "refId": "A" + } + ], + "title": "Heap Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "rate(loki_distributor_bytes_received_total[5m])", + "legendFormat": "Bytes Received", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "rate(loki_ingester_chunk_stored_bytes_total[5m])", + "legendFormat": "Bytes Stored", + "refId": "B" + } + ], + "title": "Ingestion Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "rate(loki_distributor_lines_received_total[5m])", + "legendFormat": "Lines/sec", + "refId": "A" + } + ], + "title": "Log Lines Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "id": 7, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(loki_ingester_memory_chunks_bytes)", + "legendFormat": "Chunks in Memory", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "go_memstats_heap_inuse_bytes{job=\"loki\"}", + "legendFormat": "Heap In Use", + "refId": "B" + } + ], + "title": "Memory Usage Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "id": 8, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "increase(loki_ingester_chunk_stored_bytes_total[24h])", + "legendFormat": "Bytes Stored (24h)", + "refId": "A" + } + ], + "title": "Storage Growth (24h rolling)", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 38, + "tags": ["loki", "logs"], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Loki", + "uid": "loki-homelab", + "version": 1, + "weekStart": "" +} diff --git a/ansible/roles/grafana/meta/main.yml b/ansible/roles/grafana/meta/main.yml new file mode 100644 index 0000000..9e57ded --- /dev/null +++ b/ansible/roles/grafana/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: prometheus + - role: loki diff --git a/ansible/roles/grafana/templates/datasources.yaml.j2 b/ansible/roles/grafana/templates/datasources.yaml.j2 index 017d331..838d0a6 100644 --- a/ansible/roles/grafana/templates/datasources.yaml.j2 +++ b/ansible/roles/grafana/templates/datasources.yaml.j2 @@ -10,3 +10,11 @@ datasources: url: http://localhost:9090 isDefault: true editable: false + + - name: Loki + type: loki + access: proxy + orgId: 1 + uid: loki + url: http://localhost:3100 + editable: false diff --git a/ansible/roles/loki/defaults/main.yml b/ansible/roles/loki/defaults/main.yml new file mode 100644 index 0000000..1f7d62e --- /dev/null +++ b/ansible/roles/loki/defaults/main.yml @@ -0,0 +1,12 @@ +--- +# Loki configuration + +# Server settings +loki_http_port: 3100 + +# Storage paths +loki_data_dir: /opt/homebrew/var/loki +loki_config_file: /opt/homebrew/etc/loki-local-config.yaml + +# Retention settings +loki_retention_period: 744h # 31 days diff --git a/ansible/roles/loki/handlers/main.yml b/ansible/roles/loki/handlers/main.yml new file mode 100644 index 0000000..1d6ddb9 --- /dev/null +++ b/ansible/roles/loki/handlers/main.yml @@ -0,0 +1,3 @@ +--- +- name: restart loki + ansible.builtin.command: brew services restart loki diff --git a/ansible/roles/loki/meta/main.yml b/ansible/roles/loki/meta/main.yml new file mode 100644 index 0000000..23d65c7 --- /dev/null +++ b/ansible/roles/loki/meta/main.yml @@ -0,0 +1,2 @@ +--- +dependencies: [] diff --git a/ansible/roles/loki/tasks/main.yml b/ansible/roles/loki/tasks/main.yml new file mode 100644 index 0000000..fd05cdb --- /dev/null +++ b/ansible/roles/loki/tasks/main.yml @@ -0,0 +1,38 @@ +--- +# Loki installation and configuration + +- name: Install loki via homebrew + community.general.homebrew: + name: loki + state: present + +- name: Ensure loki data directory exists + ansible.builtin.file: + path: "{{ loki_data_dir }}" + state: directory + mode: '0755' + +- name: Ensure loki chunks directory exists + ansible.builtin.file: + path: "{{ loki_data_dir }}/chunks" + state: directory + mode: '0755' + +- name: Ensure loki rules directory exists + ansible.builtin.file: + path: "{{ loki_data_dir }}/rules" + state: directory + mode: '0755' + +- name: Deploy loki configuration + ansible.builtin.template: + src: loki-config.yaml.j2 + dest: "{{ loki_config_file }}" + mode: '0644' + notify: restart loki + +- name: Ensure loki service is started + ansible.builtin.command: brew services start loki + register: brew_start + changed_when: "'Successfully started' in brew_start.stdout" + failed_when: false diff --git a/ansible/roles/loki/templates/loki-config.yaml.j2 b/ansible/roles/loki/templates/loki-config.yaml.j2 new file mode 100644 index 0000000..2c2c31d --- /dev/null +++ b/ansible/roles/loki/templates/loki-config.yaml.j2 @@ -0,0 +1,53 @@ +# {{ ansible_managed }} +# Loki configuration for single-node deployment + +auth_enabled: false + +server: + http_listen_port: {{ loki_http_port }} + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: {{ loki_data_dir }} + storage: + filesystem: + chunks_directory: {{ loki_data_dir }}/chunks + rules_directory: {{ loki_data_dir }}/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: {{ loki_data_dir }}/tsdb-index + cache_location: {{ loki_data_dir }}/tsdb-cache + +limits_config: + retention_period: {{ loki_retention_period }} + +compactor: + working_directory: {{ loki_data_dir }}/compactor + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 + delete_request_store: filesystem diff --git a/ansible/roles/prometheus/templates/prometheus.args.j2 b/ansible/roles/prometheus/templates/prometheus.args.j2 index fcfb758..ac09616 100644 --- a/ansible/roles/prometheus/templates/prometheus.args.j2 +++ b/ansible/roles/prometheus/templates/prometheus.args.j2 @@ -1,3 +1,4 @@ --config.file /opt/homebrew/etc/prometheus.yml --web.listen-address=0.0.0.0:9090 --storage.tsdb.path /opt/homebrew/var/prometheus +--web.enable-remote-write-receiver diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 index 6e992bb..1366ae4 100644 --- a/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -2,14 +2,14 @@ global: scrape_interval: 15s +# Note: indri system metrics are pushed via Alloy remote_write +# Sifaka still uses traditional scraping via node_exporter + scrape_configs: - - job_name: "node-exporter-indri" - static_configs: - - targets: ["localhost:9090"] - - targets: ["localhost:9100"] - relabel_configs: - - target_label: instance - replacement: indri - job_name: "node-exporter-sifaka" static_configs: - targets: ["sifaka:9100"] + + - job_name: "loki" + static_configs: + - targets: ["localhost:3100"] diff --git a/mise-tasks/indri-services-check b/mise-tasks/indri-services-check index ea794fd..6aa9edf 100755 --- a/mise-tasks/indri-services-check +++ b/mise-tasks/indri-services-check @@ -43,9 +43,10 @@ echo "" # Check via SSH that services are running on indri echo "Local services (via launchctl/brew services):" +check_service "loki" "ssh indri 'brew services list | grep loki | grep started'" +check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'" check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'" check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'" -check_service "node_exporter" "ssh indri 'brew services list | grep node_exporter | grep started'" check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'" check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'" check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'" @@ -54,6 +55,7 @@ check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'" echo "" echo "HTTP endpoints (via Tailscale):" +check_http "Loki" "http://indri:3100/ready" check_http "Prometheus" "http://indri:9090/-/healthy" check_http "Grafana" "http://indri:3000/api/health" check_http "Kiwix" "http://indri:5501/"