C0: review-compliance-reports — summarize image and IaC scans

Previously only the K8s CIS in-cluster scan was processed; the weekly
container-image and IaC Prowler scans were running on schedule but never
reviewed. Now each scan gets its own status / severity / week-over-week
delta, with top-N grouped tables (by check ID and resource) for the
high-volume image and IaC outputs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-04-27 12:18:06 -07:00
commit 718e0a0043
2 changed files with 321 additions and 205 deletions

View file

@ -0,0 +1 @@
`review-compliance-reports` now also fetches and summarizes the weekly Prowler container-image and IaC scans (previously only the K8s CIS in-cluster scan was processed). For each scan it shows status counts, severity breakdown, week-over-week delta, and — for the high-volume image/IaC scans — top-N tables grouped by check ID and resource instead of per-finding listings.

View file

@ -9,23 +9,26 @@
"""Fetch and summarize compliance reports from sifaka. """Fetch and summarize compliance reports from sifaka.
Covers: Covers:
- Prowler K8s CIS: CSV-based, full analysis with delta tracking - Prowler K8s CIS (in-cluster): per-finding detail
- Prowler container image scans: grouped by check + resource
- Prowler IaC manifest scans: grouped by check + resource
- Kingfisher secret scanning: TODO — pending upstream JSON/CSV output - Kingfisher secret scanning: TODO — pending upstream JSON/CSV output
support (currently HTML-only; contribute from spork) support (currently HTML-only; contribute from spork)
For Prowler, copies the two most recent K8s CIS reports, parses them, For each Prowler scan, copies the two most recent CSV reports, parses
and displays: them, and displays:
1. Overall status (pass/fail/manual/muted counts) 1. Overall status (pass/fail/manual/muted counts)
2. Unmuted failures by severity 2. Unmuted failures by severity
3. Delta from the previous report (new vs resolved) 3. Delta from the previous report (new vs resolved)
4. Actionable unmuted failures with details 4. Actionable unmuted failures (per-finding for in-cluster; grouped
by check ID and resource for image/IaC because they have far too
many findings to list individually)
This is the primary tool for the weekly compliance report review. This is the primary tool for the weekly compliance report review.
""" """
import csv import csv
import subprocess import subprocess
import sys
import tempfile import tempfile
from collections import Counter from collections import Counter
from pathlib import Path from pathlib import Path
@ -36,7 +39,12 @@ from rich.console import Console
from rich.panel import Panel from rich.panel import Panel
from rich.table import Table from rich.table import Table
REPORT_BASE = "sifaka:/volume1/reports/prowler" PROWLER_SCANS: list[tuple[str, str, bool]] = [
# (label, sifaka base path, group_findings)
("K8s CIS (In-Cluster)", "/volume1/reports/prowler", False),
("Container Images", "/volume1/reports/prowler-images", True),
("IaC (manifests)", "/volume1/reports/prowler-iac", True),
]
console = Console() console = Console()
@ -52,18 +60,18 @@ def scp(remote: str, local: str) -> bool:
return result.returncode == 0 return result.returncode == 0
def list_reports() -> list[str]: def list_reports(base: str) -> list[str]:
"""List Prowler CSV reports on sifaka, sorted by embedded timestamp.""" """List Prowler CSV reports under `base` on sifaka, sorted by timestamp."""
result = subprocess.run( result = subprocess.run(
["ssh", "sifaka", "find /volume1/reports/prowler/ -name '*.csv' " ["ssh", "sifaka", f"find {base}/ -name '*.csv' "
"-not -path '*/compliance/*' -not -name '@*'"], "-not -path '*/compliance/*' -not -name '@*'"],
capture_output=True, capture_output=True,
text=True, text=True,
timeout=15, timeout=15,
) )
if result.returncode != 0: if result.returncode != 0:
console.print("[bold red]Failed to list reports on sifaka[/bold red]") console.print(f"[bold red]Failed to list reports under {base}[/bold red]")
raise typer.Exit(code=1) return []
csvs = [p.strip() for p in result.stdout.strip().splitlines() if p.strip()] csvs = [p.strip() for p in result.stdout.strip().splitlines() if p.strip()]
# Sort by the timestamp embedded in the filename (e.g. 20260405030007) # Sort by the timestamp embedded in the filename (e.g. 20260405030007)
@ -306,40 +314,59 @@ def run_node_verification(console: Console) -> None:
console.print() console.print()
def main( SEVERITY_STYLE = {
full: Annotated[ "critical": "bold red",
bool, typer.Option(help="Show all unmuted failures, not just new ones") "high": "red",
] = False, "medium": "yellow",
show_muted: Annotated[ }
bool, typer.Option(help="Also show muted failures")
] = False,
) -> None:
csvs = list_reports()
if not csvs:
console.print("[bold red]No Prowler CSV reports found on sifaka[/bold red]")
raise typer.Exit(code=1)
with tempfile.TemporaryDirectory() as tmpdir:
# Fetch the two most recent reports def _sev_style(sev: str) -> str:
return SEVERITY_STYLE.get(sev.lower(), "")
def summarize_report(
label: str,
base: str,
tmpdir: str,
*,
show_muted: bool = False,
group_findings: bool = False,
) -> None:
"""Fetch and summarize the latest Prowler report under `base`.
When `group_findings` is True, top-N CHECK_ID and RESOURCE_NAME tables
are shown instead of a per-finding detail table — appropriate for
image and IaC scans that produce thousands of findings.
"""
console.rule(f"[bold]{label}[/bold]")
csvs = list_reports(base)
if not csvs:
console.print(
f"[bold yellow]{label}: no Prowler CSV reports found "
f"under {base}[/bold yellow]"
)
console.print()
return
safe = "".join(c if c.isalnum() else "_" for c in label.lower())
latest_remote = csvs[-1] latest_remote = csvs[-1]
latest_local = Path(tmpdir) / "latest.csv" latest_local = Path(tmpdir) / f"{safe}_latest.csv"
console.print(f"[dim]Fetching {latest_remote}...[/dim]") console.print(f"[dim]Fetching {latest_remote}...[/dim]")
if not scp(f"sifaka:{latest_remote}", str(latest_local)): if not scp(f"sifaka:{latest_remote}", str(latest_local)):
console.print("[bold red]Failed to copy latest report[/bold red]") console.print(f"[bold red]Failed to copy {latest_remote}[/bold red]")
raise typer.Exit(code=1) return
prev_local = None prev_local: Path | None = None
if len(csvs) >= 2: if len(csvs) >= 2:
prev_remote = csvs[-2] prev_remote = csvs[-2]
prev_local = Path(tmpdir) / "prev.csv" prev_path = Path(tmpdir) / f"{safe}_prev.csv"
console.print(f"[dim]Fetching {prev_remote}...[/dim]") console.print(f"[dim]Fetching {prev_remote}...[/dim]")
if not scp(f"sifaka:{prev_remote}", str(prev_local)): if scp(f"sifaka:{prev_remote}", str(prev_path)):
prev_local = None prev_local = prev_path
latest = parse_findings(load_csv(str(latest_local))) latest = parse_findings(load_csv(str(latest_local)))
# Extract report date from filename
report_name = Path(latest_remote).stem report_name = Path(latest_remote).stem
console.print() console.print()
@ -358,7 +385,6 @@ def main(
f"[{style}]{count}[/{style}]" if style else str(count), f"[{style}]{count}[/{style}]" if style else str(count),
) )
fail_count = len(latest["fails"])
muted_count = len(latest["muted"]) muted_count = len(latest["muted"])
unmuted_count = len(latest["unmuted"]) unmuted_count = len(latest["unmuted"])
status_table.add_row("", "") status_table.add_row("", "")
@ -385,18 +411,11 @@ def main(
sev_table.add_column("Severity") sev_table.add_column("Severity")
sev_table.add_column("Count", justify="right") sev_table.add_column("Count", justify="right")
for sev, count in Counter( for sev, count in sorted(
r["SEVERITY"] for r in latest["unmuted"] Counter(r["SEVERITY"] for r in latest["unmuted"]).items(),
).most_common(): key=lambda kv: severity_sort({"SEVERITY": kv[0]}),
style = ( ):
"bold red" style = _sev_style(sev)
if sev == "critical"
else "red"
if sev == "high"
else "yellow"
if sev == "medium"
else ""
)
sev_table.add_row( sev_table.add_row(
f"[{style}]{sev}[/{style}]" if style else sev, f"[{style}]{sev}[/{style}]" if style else sev,
f"[{style}]{count}[/{style}]" if style else str(count), f"[{style}]{count}[/{style}]" if style else str(count),
@ -424,7 +443,7 @@ def main(
f"[green]Resolved: {len(resolved_keys)}[/green]", f"[green]Resolved: {len(resolved_keys)}[/green]",
f"[red]New: {len(new_keys)}[/red]" f"[red]New: {len(new_keys)}[/red]"
if new_keys if new_keys
else f"[green]New: 0[/green]", else "[green]New: 0[/green]",
] ]
console.print( console.print(
@ -436,6 +455,10 @@ def main(
) )
console.print() console.print()
# For grouped scans the new/resolved listings are too noisy
# (potentially thousands of lines). Skip the listings; the count
# is in the panel above and detail is in the grouped tables.
if not group_findings:
if new_keys: if new_keys:
console.print("[bold red]New Unmuted Failures:[/bold red]") console.print("[bold red]New Unmuted Failures:[/bold red]")
for k in sorted(new_keys): for k in sorted(new_keys):
@ -456,42 +479,12 @@ def main(
) )
console.print() console.print()
# --- Unmuted failure details --- # --- Unmuted failure details (grouped or per-finding) ---
findings_to_show = latest["unmuted"] if full else [] if latest["unmuted"]:
if not full and latest["unmuted"]: if group_findings:
findings_to_show = latest["unmuted"] _print_grouped_findings(latest["unmuted"])
else:
if findings_to_show: _print_findings_detail(latest["unmuted"])
detail_table = Table(
show_header=True,
header_style="bold",
title="Unmuted Failures — Action Needed",
)
detail_table.add_column("Severity")
detail_table.add_column("Check")
detail_table.add_column("Resource")
detail_table.add_column("Detail", max_width=60)
for r in sorted(findings_to_show, key=severity_sort):
sev = r["SEVERITY"]
style = (
"bold red"
if sev == "critical"
else "red"
if sev == "high"
else "yellow"
if sev == "medium"
else ""
)
detail_table.add_row(
f"[{style}]{sev}[/{style}]" if style else sev,
r["CHECK_ID"],
r.get("RESOURCE_NAME", ""),
r["STATUS_EXTENDED"][:60],
)
console.print(detail_table)
console.print()
# --- Muted findings summary --- # --- Muted findings summary ---
if show_muted and latest["muted"]: if show_muted and latest["muted"]:
@ -509,9 +502,14 @@ def main(
muted_groups[(r["SEVERITY"], r["CHECK_ID"])] += 1 muted_groups[(r["SEVERITY"], r["CHECK_ID"])] += 1
for (sev, check), count in sorted( for (sev, check), count in sorted(
muted_groups.items(), key=lambda x: severity_sort({"SEVERITY": x[0][0]}) muted_groups.items(),
key=lambda x: severity_sort({"SEVERITY": x[0][0]}),
): ):
muted_table.add_row(f"[dim]{sev}[/dim]", f"[dim]{check}[/dim]", f"[dim]{count}[/dim]") muted_table.add_row(
f"[dim]{sev}[/dim]",
f"[dim]{check}[/dim]",
f"[dim]{count}[/dim]",
)
console.print(muted_table) console.print(muted_table)
console.print() console.print()
@ -521,7 +519,7 @@ def main(
console.print( console.print(
Panel( Panel(
"[bold green]All clear.[/bold green] No unmuted failures.", "[bold green]All clear.[/bold green] No unmuted failures.",
title="Prowler Verdict", title=f"{label} Verdict",
border_style="green", border_style="green",
) )
) )
@ -530,12 +528,129 @@ def main(
Panel( Panel(
f"[bold yellow]{len(latest['unmuted'])} unmuted failure(s) " f"[bold yellow]{len(latest['unmuted'])} unmuted failure(s) "
f"need triage.[/bold yellow]\n\n" f"need triage.[/bold yellow]\n\n"
"For each: remediate (fix the pod spec) or mute " "For each: remediate or mute "
"(add to mutelist + compensating control).", "(add to mutelist + compensating control).",
title="Prowler Verdict", title=f"{label} Verdict",
border_style="yellow", border_style="yellow",
) )
) )
console.print()
def _print_findings_detail(unmuted: list[dict]) -> None:
"""Per-finding detail table — appropriate when finding count is small."""
detail_table = Table(
show_header=True,
header_style="bold",
title="Unmuted Failures — Action Needed",
)
detail_table.add_column("Severity")
detail_table.add_column("Check")
detail_table.add_column("Resource")
detail_table.add_column("Detail", max_width=60)
for r in sorted(unmuted, key=severity_sort):
sev = r["SEVERITY"]
style = _sev_style(sev)
detail_table.add_row(
f"[{style}]{sev}[/{style}]" if style else sev,
r["CHECK_ID"],
r.get("RESOURCE_NAME", ""),
r["STATUS_EXTENDED"][:60],
)
console.print(detail_table)
console.print()
def _worst_severity(rows: list[dict]) -> str:
"""Return the most severe severity label across `rows`."""
if not rows:
return ""
return min(
(r["SEVERITY"] for r in rows),
key=lambda s: severity_sort({"SEVERITY": s}),
)
def _print_grouped_findings(unmuted: list[dict], top_n: int = 15) -> None:
"""Top-N tables grouped by CHECK_ID and RESOURCE_NAME.
Used for image and IaC scans where per-finding tables would be too
large to be useful. Shows count and worst severity for each group.
"""
by_check: dict[str, list[dict]] = {}
by_resource: dict[str, list[dict]] = {}
for r in unmuted:
by_check.setdefault(r["CHECK_ID"], []).append(r)
by_resource.setdefault(r.get("RESOURCE_NAME", "") or "(no resource)", []).append(r)
check_table = Table(
show_header=True,
header_style="bold",
title=f"Top {top_n} Checks by Unmuted Finding Count",
)
check_table.add_column("Worst Sev")
check_table.add_column("Check ID")
check_table.add_column("Count", justify="right")
for check, rows in sorted(
by_check.items(), key=lambda kv: -len(kv[1])
)[:top_n]:
worst = _worst_severity(rows)
style = _sev_style(worst)
check_table.add_row(
f"[{style}]{worst}[/{style}]" if style else worst,
check,
str(len(rows)),
)
console.print(check_table)
console.print()
res_table = Table(
show_header=True,
header_style="bold",
title=f"Top {top_n} Resources by Unmuted Finding Count",
)
res_table.add_column("Worst Sev")
res_table.add_column("Resource")
res_table.add_column("Count", justify="right")
for resource, rows in sorted(
by_resource.items(), key=lambda kv: -len(kv[1])
)[:top_n]:
worst = _worst_severity(rows)
style = _sev_style(worst)
res_table.add_row(
f"[{style}]{worst}[/{style}]" if style else worst,
resource[:80],
str(len(rows)),
)
console.print(res_table)
console.print()
def main(
full: Annotated[
bool, typer.Option(help="(reserved) currently a no-op; all unmuted failures already shown")
] = False,
show_muted: Annotated[
bool, typer.Option(help="Also show muted failures")
] = False,
) -> None:
del full # historical flag, kept for backwards compatibility
with tempfile.TemporaryDirectory() as tmpdir:
for label, base, group in PROWLER_SCANS:
summarize_report(
label,
base,
tmpdir,
show_muted=show_muted,
group_findings=group,
)
# --- Node-level MANUAL check verification --- # --- Node-level MANUAL check verification ---
# Compensating control: node-config-automated-verification # Compensating control: node-config-automated-verification