diff --git a/docs/access-map-viewer/index.html b/docs/access-map-viewer/index.html
index 3df851c..478cf8e 100644
--- a/docs/access-map-viewer/index.html
+++ b/docs/access-map-viewer/index.html
@@ -243,12 +243,12 @@
/* Upload */
.upload-area {
- padding: 32px;
+ padding: 36px 28px 24px;
text-align: center;
background: var(--surface-muted);
cursor: pointer;
transition: all 0.2s ease;
- border-bottom: 1px solid var(--border);
+ border: 1px dashed var(--border);
border-radius: var(--radius);
}
.upload-area:hover,
@@ -256,9 +256,95 @@
background: var(--brand-soft);
border-color: var(--brand);
}
- .upload-icon { font-size: 48px; margin-bottom: 12px; opacity: 0.8; }
- .upload-text { font-size: 16px; font-weight: 500; }
- .upload-sub { color: var(--text-muted); margin-top: 4px; }
+ .upload-icon {
+ width: 68px;
+ height: 68px;
+ margin: 0 auto 16px;
+ border-radius: 18px;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ font-size: 34px;
+ background: var(--surface);
+ border: 1px solid var(--border);
+ box-shadow: var(--shadow-sm);
+ opacity: 0.9;
+ }
+ .upload-text {
+ font-size: 28px;
+ line-height: 1.2;
+ font-weight: 700;
+ letter-spacing: -0.02em;
+ color: var(--text-main);
+ max-width: 420px;
+ margin: 0 auto;
+ }
+ .upload-sub {
+ color: var(--text-muted);
+ margin-top: 8px;
+ font-size: 15px;
+ line-height: 1.5;
+ max-width: 560px;
+ margin-left: auto;
+ margin-right: auto;
+ }
+ .upload-action-row {
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ gap: 12px;
+ flex-wrap: wrap;
+ margin-top: 18px;
+ }
+ .upload-primary {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ padding: 10px 16px;
+ border-radius: 999px;
+ background: var(--brand);
+ color: #fff;
+ font-size: 13px;
+ font-weight: 700;
+ letter-spacing: 0.01em;
+ box-shadow: var(--shadow-sm);
+ }
+ .upload-action-hint {
+ font-size: 12px;
+ color: var(--text-muted);
+ font-weight: 600;
+ }
+ .upload-help-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+ gap: 12px;
+ margin-top: 22px;
+ text-align: left;
+ }
+ .upload-help-card {
+ background: var(--surface);
+ border: 1px solid var(--border);
+ border-radius: 12px;
+ padding: 14px 14px 12px;
+ box-shadow: var(--shadow-sm);
+ }
+ .upload-help-label {
+ font-size: 11px;
+ font-weight: 700;
+ text-transform: uppercase;
+ letter-spacing: 0.06em;
+ color: var(--brand-dark);
+ margin-bottom: 6px;
+ }
+ .upload-help-text {
+ font-size: 13px;
+ line-height: 1.5;
+ color: var(--text-muted);
+ }
+ .upload-help-text strong {
+ color: var(--text-main);
+ font-weight: 700;
+ }
/* Metrics */
.metrics-grid {
@@ -639,13 +725,120 @@
flex-shrink: 0;
}
- #status-chart {
+ #status-chart,
+ #source-chart {
background: var(--surface-muted);
border: 1px solid var(--border);
border-radius: 10px;
box-shadow: var(--shadow-sm);
}
+ .dashboard-breakdown {
+ padding: 0 20px 20px;
+ }
+ .breakdown-card {
+ background: var(--surface);
+ border: 1px solid var(--border);
+ border-radius: var(--radius);
+ box-shadow: var(--shadow-sm);
+ overflow: hidden;
+ }
+ .breakdown-header {
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ gap: 12px;
+ padding: 14px 16px;
+ border-bottom: 1px solid var(--border);
+ background: var(--surface-strong);
+ }
+ .breakdown-title {
+ font-size: 14px;
+ font-weight: 700;
+ color: var(--text-main);
+ }
+ .breakdown-sub {
+ font-size: 12px;
+ color: var(--text-muted);
+ margin-top: 3px;
+ }
+ .breakdown-controls {
+ display: flex;
+ align-items: center;
+ gap: 10px;
+ flex-wrap: wrap;
+ }
+ .breakdown-toggle {
+ display: inline-flex;
+ align-items: center;
+ gap: 8px;
+ font-size: 12px;
+ color: var(--text-main);
+ font-weight: 600;
+ cursor: pointer;
+ user-select: none;
+ }
+ .breakdown-table {
+ width: 100%;
+ border-collapse: collapse;
+ font-size: 13px;
+ }
+ .breakdown-table th,
+ .breakdown-table td {
+ padding: 10px 14px;
+ border-bottom: 1px solid var(--border);
+ text-align: left;
+ vertical-align: top;
+ }
+ .breakdown-table th {
+ font-size: 11px;
+ text-transform: uppercase;
+ letter-spacing: 0.05em;
+ color: var(--text-muted);
+ font-weight: 700;
+ background: var(--surface);
+ }
+ .breakdown-sort-btn {
+ border: 0;
+ background: transparent;
+ padding: 0;
+ margin: 0;
+ display: inline-flex;
+ align-items: center;
+ gap: 6px;
+ color: inherit;
+ font: inherit;
+ text-transform: inherit;
+ letter-spacing: inherit;
+ font-weight: inherit;
+ cursor: pointer;
+ }
+ .breakdown-sort-indicator {
+ font-size: 11px;
+ color: var(--text-muted);
+ min-width: 10px;
+ text-align: center;
+ }
+ .breakdown-table tr:last-child td {
+ border-bottom: 0;
+ }
+ .breakdown-name {
+ font-weight: 700;
+ color: var(--text-main);
+ word-break: break-word;
+ }
+ .breakdown-count {
+ font-weight: 700;
+ color: var(--text-main);
+ white-space: nowrap;
+ }
+ .breakdown-empty {
+ padding: 24px 16px;
+ text-align: center;
+ color: var(--text-muted);
+ font-size: 13px;
+ }
+
/* Findings table */
.table-container { width: 100%; overflow-x: auto; }
.table { width: 100%; border-collapse: collapse; font-size: 13px; }
@@ -1238,9 +1431,9 @@
Access Map & Findings
-
+
Light Mode
- Load New Report
+ Clear and Load New Report(s)
@@ -1257,9 +1450,30 @@
π
-
Drag & drop reports here
-
...or click to choose files
-
Your files stay in the browser. Supported inputs: Kingfisher JSON/JSONL, Gitleaks JSON, and TruffleHog JSON/JSONL. Multiple files are merged and deduplicated.
+
Load one or more reports
+
Drop files anywhere in this card, or click to choose them from disk.
+
+ Choose Reports
+ JSON and JSONL only
+
+
+
+
Formats
+
Kingfisher JSON/JSONL, Gitleaks JSON, and TruffleHog JSON/JSONL.
+
+
+
Merge More
+
Choose multiple files at once, or drag in more report files after loading to merge them into the same view.
+
+
+
Deduplication
+
Kingfisher reports are already deduplicated. For imported Gitleaks and TruffleHog reports, the viewer attempts deduplication by finding fingerprint: native fingerprints when present, otherwise a synthetic fingerprint built from the tool, detector, and secret identity, with path/line/snippet fallback when needed.
+
+
+
Privacy
+
Everything stays in your browser. Files are not uploaded anywhere.
+
+
@@ -1343,7 +1557,7 @@
@@ -1353,6 +1567,55 @@
+
+
+
+
@@ -1394,6 +1657,12 @@
Download JSON
Export CSV
+
+ All sources
+ Kingfisher
+ Gitleaks
+ TruffleHog
+
All validation states
Active Credential
@@ -1559,10 +1828,15 @@
let filteredAccessMapView = [];
let currentFilter = "";
let validationFilter = "all";
+ let sourceFilter = "all";
let pageSize = 10;
let currentPage = 1;
let sortField = "rule";
let sortDirection = "asc";
+ let detectorBreakdownSortField = "count";
+ let detectorBreakdownSortDirection = "desc";
+ let detectorBreakdownActiveOnly = false;
+ let detectorBreakdownPage = 1;
let autoCollapsedAccessMap = false;
let currentDetailFinding = null;
let scanMetadata = {};
@@ -1579,8 +1853,14 @@
const reportScopeSelect = document.getElementById("report-scope");
const downloadAccessReportBtn = document.getElementById("download-access-report");
const exportFindingRiskBtn = document.getElementById("fd-export-risk-report");
+ const sourceDetectorCharts = document.getElementById("source-detector-charts");
+ const detectorBreakdownActiveOnlyToggle = document.getElementById("detector-breakdown-active-only");
+ const detectorBreakdownPrev = document.getElementById("detector-breakdown-prev");
+ const detectorBreakdownNext = document.getElementById("detector-breakdown-next");
+ const detectorBreakdownPageInfo = document.getElementById("detector-breakdown-page-info");
const searchInput = document.getElementById("search-input");
+ const sourceSelect = document.getElementById("source-filter");
const validationSelect = document.getElementById("validation-filter");
const rowsSelect = document.getElementById("rows-select");
const pagePrev = document.getElementById("page-prev");
@@ -1600,6 +1880,9 @@
const navButtons = document.querySelectorAll("[data-view-target]");
const statusChartCanvas = document.getElementById("status-chart");
const statusLegend = document.getElementById("status-legend");
+ const sourceChartCanvas = document.getElementById("source-chart");
+ const sourceLegend = document.getElementById("source-legend");
+ const detectorBreakdown = document.getElementById("detector-breakdown");
const viewRegistry = {
"view-dashboard": document.getElementById("view-dashboard"),
"view-access": document.getElementById("view-access"),
@@ -1618,9 +1901,20 @@
setTheme(localStorage.getItem(THEME_KEY));
- dropZone.addEventListener("click", () => fileInput.click());
+ function hasLoadedReportData() {
+ return findings.length > 0 || accessMap.length > 0 || rawData !== null;
+ }
+
+ function openReplaceFilePicker() {
+ fileInput.value = "";
+ fileInput.click();
+ }
+
+ dropZone.addEventListener("click", () => openReplaceFilePicker());
fileInput.addEventListener("change", (e) => {
- if (e.target.files.length) processFiles(Array.from(e.target.files));
+ if (e.target.files.length) {
+ processFiles(Array.from(e.target.files), { append: false });
+ }
});
dropZone.addEventListener("dragover", (e) => {
e.preventDefault();
@@ -1633,7 +1927,9 @@
dropZone.addEventListener("drop", (e) => {
e.preventDefault();
dropZone.classList.remove("active");
- if (e.dataTransfer.files.length) processFiles(Array.from(e.dataTransfer.files));
+ if (e.dataTransfer.files.length) {
+ processFiles(Array.from(e.dataTransfer.files), { append: hasLoadedReportData() });
+ }
});
navButtons.forEach((btn) => {
@@ -1644,7 +1940,7 @@
resetButton.addEventListener("click", () => {
const confirmReset = confirm(
- "Loading a new report will clear the currently loaded data (your file stays on disk). Continue?",
+ "Clearing and loading a new report will discard the currently loaded data from the viewer (your files stay on disk). Continue?",
);
if (confirmReset) {
@@ -1664,6 +1960,36 @@
renderFindingsTable();
});
+ sourceSelect.addEventListener("change", (e) => {
+ sourceFilter = e.target.value || "all";
+ currentPage = 1;
+ renderFindingsTable();
+ });
+
+ if (detectorBreakdownActiveOnlyToggle) {
+ detectorBreakdownActiveOnlyToggle.addEventListener("change", (e) => {
+ detectorBreakdownActiveOnly = Boolean(e.target.checked);
+ detectorBreakdownPage = 1;
+ renderDetectorBreakdown();
+ });
+ }
+
+ if (detectorBreakdownPrev) {
+ detectorBreakdownPrev.addEventListener("click", () => {
+ if (detectorBreakdownPage > 1) {
+ detectorBreakdownPage--;
+ renderDetectorBreakdown();
+ }
+ });
+ }
+
+ if (detectorBreakdownNext) {
+ detectorBreakdownNext.addEventListener("click", () => {
+ detectorBreakdownPage++;
+ renderDetectorBreakdown();
+ });
+ }
+
rowsSelect.addEventListener("change", (e) => {
pageSize = parseInt(e.target.value, 10) || 10;
currentPage = 1;
@@ -2188,11 +2514,11 @@
const firstEntry = entries.find((item) => item && typeof item === "object") || {};
const notes = toolName === "TruffleHog"
? [
- "Imported from TruffleHog JSON. Kingfisher import deduplicates repeated TruffleHog findings by secret identity. Only findings TruffleHog marked verified are shown as active credentials; all other findings stay in Not Attempted.",
+ "Imported from TruffleHog JSON. Native Kingfisher reports are already deduplicated; for TruffleHog imports, the viewer attempts deduplication by finding fingerprint, preferring detector plus secret identity and falling back to path, line, and snippet when needed. Only findings TruffleHog marked verified are shown as active credentials; all other findings stay in Not Attempted.",
"Imported reports do not include Kingfisher access-map data, validate commands, or revoke commands. Re-scan with Kingfisher and --access-map for full enrichment.",
]
: [
- "Imported from Gitleaks JSON. Findings are normalized for browsing, but validation, revoke commands, and access-map blast radius are unavailable in the imported report.",
+ "Imported from Gitleaks JSON. Native Kingfisher reports are already deduplicated; for Gitleaks imports, the viewer attempts deduplication by finding fingerprint, preferring rule plus secret identity and falling back to path, line, column, and snippet when needed.",
"Re-scan with Kingfisher and --access-map to add validation context and blast-radius mapping.",
];
const target = String(firstNonEmpty(
@@ -2282,7 +2608,7 @@
return merged;
}
- function processFiles(files) {
+ function processFiles(files, { append = false } = {}) {
const validFiles = files.filter((f) => {
const name = f.name.toLowerCase();
return name.endsWith(".json") || name.endsWith(".jsonl");
@@ -2295,8 +2621,8 @@
}
const label = validFiles.length === 1
- ? 'Processing "' + validFiles[0].name + '"β¦'
- : "Processing " + validFiles.length + " filesβ¦";
+ ? (append ? 'Importing "' + validFiles[0].name + '"β¦' : 'Processing "' + validFiles[0].name + '"β¦')
+ : (append ? "Importing " + validFiles.length + " additional filesβ¦" : "Processing " + validFiles.length + " filesβ¦");
loaderText.textContent = label;
loader.classList.remove("hidden");
errorMsg.classList.add("hidden");
@@ -2313,7 +2639,7 @@
completed++;
if (completed === validFiles.length) {
try {
- parseAndRenderMultiple(texts);
+ parseAndRenderMultiple(texts, { append });
} catch (err) {
console.error(err);
errorMsg.textContent = "Error parsing files: " + err.message;
@@ -2333,7 +2659,7 @@
loader.classList.add("hidden");
} else {
try {
- parseAndRenderMultiple(validTexts);
+ parseAndRenderMultiple(validTexts, { append });
} catch (err) {
console.error(err);
errorMsg.textContent = "Error parsing files: " + err.message;
@@ -2382,15 +2708,20 @@
currentFilter = "";
validationFilter = "all";
+ sourceFilter = "all";
pageSize = 10;
currentPage = 1;
sortField = "rule";
sortDirection = "asc";
+ detectorBreakdownActiveOnly = false;
+ detectorBreakdownPage = 1;
searchInput.value = "";
validationSelect.value = "all";
+ sourceSelect.value = "all";
rowsSelect.value = "10";
treeSearch.value = "";
+ if (detectorBreakdownActiveOnlyToggle) detectorBreakdownActiveOnlyToggle.checked = false;
setAccessMapCollapsed(true, { auto: true });
if (amToggle) amToggle.disabled = true;
@@ -2427,29 +2758,30 @@
fileInput.value = "";
if (promptFilePicker) {
- fileInput.click();
+ openReplaceFilePicker();
}
}
- function parseAndRenderMultiple(texts) {
+ function parseAndRenderMultiple(texts, { append = false } = {}) {
const t0 = performance.now();
- findings = [];
- accessMap = [];
- rawData = null;
+ const nextFindings = append ? findings.slice() : [];
+ const nextAccessMap = append ? accessMap.slice() : [];
+ let nextRawData = append ? rawData : null;
for (let i = 0; i < texts.length; i++) {
const text = texts[i];
if (!text) continue;
const { f, am, rd } = parseSingleText(text);
- findings.push(...f);
- accessMap.push(...am);
- rawData = mergeRawReportData(rawData, rd);
+ nextFindings.push(...f);
+ nextAccessMap.push(...am);
+ nextRawData = mergeRawReportData(nextRawData, rd);
}
// Deduplicate findings by fingerprint
- findings = deduplicateFindings(findings);
+ findings = deduplicateFindings(nextFindings);
// Deduplicate access map entries by fingerprint
- accessMap = deduplicateAccessMap(accessMap);
+ accessMap = deduplicateAccessMap(nextAccessMap);
+ rawData = nextRawData;
finalizeRender(t0);
}
@@ -2519,8 +2851,10 @@
currentPage = 1;
currentFilter = "";
validationFilter = "all";
+ sourceFilter = "all";
searchInput.value = "";
validationSelect.value = "all";
+ sourceSelect.value = "all";
extractScanMetadata();
setActiveView("view-dashboard");
@@ -2732,11 +3066,27 @@
document.getElementById("stat-identities").textContent = (accessMap || []).length.toString();
renderStatusChart(validationCounts);
+ renderSourceChart(calculateSourceCounts());
+ renderSourceDetectorCharts();
+ renderDetectorBreakdown();
+ }
+
+ function getFindingSourceTool(finding) {
+ const importInfo = finding && finding.viewer_import && typeof finding.viewer_import === "object"
+ ? finding.viewer_import
+ : null;
+ const sourceTool = importInfo && importInfo.source_tool
+ ? String(importInfo.source_tool).trim().toLowerCase()
+ : "";
+ if (sourceTool === "gitleaks") return "gitleaks";
+ if (sourceTool === "trufflehog") return "trufflehog";
+ return "kingfisher";
}
function getFilteredSortedFindings() {
const filterLower = currentFilter.toLowerCase();
const validation = validationFilter;
+ const source = sourceFilter;
let arr = findings.filter((f) => {
const rule = f.rule || {};
@@ -2745,6 +3095,7 @@
const path = (finding.path || "").toLowerCase();
const snippet = (finding.snippet || "").toLowerCase();
const fingerprint = (finding.fingerprint || "").toLowerCase();
+ const sourceTool = getFindingSourceTool(finding);
const status = (finding.validation && finding.validation.status
? String(finding.validation.status)
: "").toLowerCase();
@@ -2757,6 +3108,10 @@
const normalizedStatus = normalizeValidationStatus(status);
+ if (source !== "all" && sourceTool !== source) {
+ return false;
+ }
+
if (validation === "active") {
return normalizedStatus === "active";
} else if (validation === "inactive") {
@@ -2877,37 +3232,219 @@
return counts;
}
- function renderStatusChart(counts) {
- if (!statusChartCanvas) return;
- const ctx = statusChartCanvas.getContext("2d");
+ function calculateSourceCounts(list = findings) {
+ const counts = { kingfisher: 0, gitleaks: 0, trufflehog: 0 };
+ (list || []).forEach((f) => {
+ const finding = f && f.finding ? f.finding : {};
+ const source = getFindingSourceTool(finding);
+ counts[source] = (counts[source] || 0) + 1;
+ });
+ return counts;
+ }
+
+ function normalizeRuleFamilyId(ruleId) {
+ const normalized = String(ruleId || "").trim();
+ if (!normalized) return "";
+ return normalized.replace(/\.\d+$/, "").replace(/^kingfisher\./, "");
+ }
+
+ function getFindingDetectorLabel(entry) {
+ const rule = entry && entry.rule ? entry.rule : {};
+ const finding = entry && entry.finding ? entry.finding : {};
+ const source = getFindingSourceTool(finding);
+
+ if (source === "kingfisher") {
+ return normalizeRuleFamilyId(rule.id) || String(rule.name || rule.id || "Unknown");
+ }
+
+ const importInfo = finding.viewer_import && typeof finding.viewer_import === "object"
+ ? finding.viewer_import
+ : null;
+ return String(
+ (importInfo && (importInfo.source_rule_name || importInfo.source_rule_id)) ||
+ rule.name ||
+ rule.id ||
+ "Unknown"
+ );
+ }
+
+ function getDetectorBreakdownEntries(list = findings) {
+ const grouped = new Map();
+ const workingList = detectorBreakdownActiveOnly
+ ? (list || []).filter((entry) => {
+ const finding = entry && entry.finding ? entry.finding : {};
+ const normalizedStatus = normalizeValidationStatus(
+ finding.validation && finding.validation.status ? finding.validation.status : ""
+ );
+ return normalizedStatus === "active";
+ })
+ : (list || []);
+
+ workingList.forEach((entry) => {
+ const finding = entry && entry.finding ? entry.finding : {};
+ const source = getFindingSourceTool(finding);
+ const normalizedStatus = normalizeValidationStatus(
+ finding.validation && finding.validation.status ? finding.validation.status : ""
+ );
+ const label = getFindingDetectorLabel(entry);
+
+ const key = source + "::" + label;
+ if (!grouped.has(key)) {
+ grouped.set(key, {
+ key,
+ source,
+ label,
+ count: 0,
+ active: 0,
+ });
+ }
+ const current = grouped.get(key);
+ current.count += 1;
+ if (normalizedStatus === "active") current.active += 1;
+ });
+
+ const entries = Array.from(grouped.values());
+ const dirFactor = detectorBreakdownSortDirection === "asc" ? 1 : -1;
+ entries.sort((a, b) => {
+ let va;
+ let vb;
+ switch (detectorBreakdownSortField) {
+ case "label":
+ va = (a.label || "").toLowerCase();
+ vb = (b.label || "").toLowerCase();
+ break;
+ case "source":
+ va = getSourceDisplayName(a.source).toLowerCase();
+ vb = getSourceDisplayName(b.source).toLowerCase();
+ break;
+ case "active":
+ va = Number(a.active || 0);
+ vb = Number(b.active || 0);
+ break;
+ case "count":
+ default:
+ va = Number(a.count || 0);
+ vb = Number(b.count || 0);
+ break;
+ }
+
+ if (typeof va === "number" && typeof vb === "number") {
+ if (va < vb) return -1 * dirFactor;
+ if (va > vb) return 1 * dirFactor;
+ return a.label.localeCompare(b.label);
+ }
+
+ if (va < vb) return -1 * dirFactor;
+ if (va > vb) return 1 * dirFactor;
+ return Number(b.count || 0) - Number(a.count || 0);
+ });
+
+ return entries;
+ }
+
+ function getChartColor(index) {
+ const palette = [
+ "#0ea5e9",
+ "#8b5cf6",
+ "#22c55e",
+ "#f97316",
+ "#ec4899",
+ "#eab308",
+ "#14b8a6",
+ "#ef4444",
+ ];
+ return palette[index % palette.length];
+ }
+
+ function getSourceDetectorChartEntries(list = findings) {
+ const groupedBySource = new Map();
+ (list || []).forEach((entry) => {
+ const finding = entry && entry.finding ? entry.finding : {};
+ const source = getFindingSourceTool(finding);
+ const label = getFindingDetectorLabel(entry);
+ if (!groupedBySource.has(source)) groupedBySource.set(source, new Map());
+ const detectorCounts = groupedBySource.get(source);
+ detectorCounts.set(label, (detectorCounts.get(label) || 0) + 1);
+ });
+
+ return ["kingfisher", "gitleaks", "trufflehog"]
+ .filter((source) => groupedBySource.has(source))
+ .map((source) => {
+ const countsMap = groupedBySource.get(source);
+ const sortedEntries = Array.from(countsMap.entries())
+ .map(([label, count]) => ({ label, count }))
+ .sort((a, b) => {
+ if (b.count !== a.count) return b.count - a.count;
+ return a.label.localeCompare(b.label);
+ });
+
+ const topEntries = sortedEntries.slice(0, 6);
+ const otherCount = sortedEntries.slice(6).reduce((sum, entry) => sum + entry.count, 0);
+ if (otherCount > 0) topEntries.push({ label: "Other", count: otherCount });
+
+ const dataPoints = topEntries.map((entry, index) => ({
+ key: entry.label,
+ label: entry.label,
+ color: entry.label === "Other" ? "#94a3b8" : getChartColor(index),
+ }));
+ const counts = {};
+ topEntries.forEach((entry) => {
+ counts[entry.label] = entry.count;
+ });
+
+ return {
+ source,
+ sourceLabel: getSourceDisplayName(source),
+ detectorCount: sortedEntries.length,
+ counts,
+ dataPoints,
+ };
+ });
+ }
+
+ function toggleDetectorBreakdownSort(field) {
+ if (detectorBreakdownSortField === field) {
+ detectorBreakdownSortDirection = detectorBreakdownSortDirection === "asc" ? "desc" : "asc";
+ } else {
+ detectorBreakdownSortField = field;
+ detectorBreakdownSortDirection = field === "label" || field === "source" ? "asc" : "desc";
+ }
+ detectorBreakdownPage = 1;
+ renderDetectorBreakdown();
+ }
+
+ function getDetectorBreakdownSortIndicator(field) {
+ if (detectorBreakdownSortField !== field) return "";
+ return detectorBreakdownSortDirection === "asc" ? "β²" : "βΌ";
+ }
+
+ function getSourceBadgeClass(source) {
+ if (source === "gitleaks") return "badge-github";
+ if (source === "trufflehog") return "badge-gcp";
+ return "badge-aws";
+ }
+
+ function getSourceDisplayName(source) {
+ if (source === "gitleaks") return "Gitleaks";
+ if (source === "trufflehog") return "TruffleHog";
+ return "Kingfisher";
+ }
+
+ function renderDonutChart(canvas, legendEl, dataPoints, counts, emptyLabel) {
+ if (!canvas) return;
+ const ctx = canvas.getContext("2d");
if (!ctx) return;
- const palette = {
- active: "#22c55e",
- inactive: "#f97316",
- not_attempted: "#38bdf8",
- canary: "#a855f7",
- unknown: "#9ca3af",
- };
-
- const dataPoints = [
- { key: "active", label: "Active", color: palette.active },
- { key: "inactive", label: "Inactive", color: palette.inactive },
- { key: "not_attempted", label: "Not Attempted", color: palette.not_attempted },
- { key: "canary", label: "Canary Token", color: palette.canary },
- { key: "unknown", label: "Unknown", color: palette.unknown },
- ];
-
const total = dataPoints.reduce((sum, entry) => sum + (counts[entry.key] || 0), 0);
- ctx.clearRect(0, 0, statusChartCanvas.width, statusChartCanvas.height);
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
const style = getComputedStyle(document.documentElement);
const surfaceColor = style.getPropertyValue("--surface") || "#0d1424";
const textColor = style.getPropertyValue("--text-main") || "#e5e7eb";
- const radius = Math.min(statusChartCanvas.width, statusChartCanvas.height) / 2 - 10;
- const centerX = statusChartCanvas.width / 2;
- const centerY = statusChartCanvas.height / 2;
+ const radius = Math.min(canvas.width, canvas.height) / 2 - 10;
+ const centerX = canvas.width / 2;
+ const centerY = canvas.height / 2;
ctx.save();
ctx.fillStyle = surfaceColor;
@@ -2921,7 +3458,7 @@
ctx.font = "600 14px -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif";
ctx.textAlign = "center";
ctx.textBaseline = "middle";
- ctx.fillText("No validation data yet", centerX, centerY);
+ ctx.fillText(emptyLabel, centerX, centerY);
} else {
let startAngle = -Math.PI / 2;
dataPoints.forEach((entry) => {
@@ -2954,9 +3491,10 @@
ctx.fillText(total + " findings", centerX, centerY);
ctx.restore();
- if (statusLegend) {
- statusLegend.innerHTML = "";
+ if (legendEl) {
+ legendEl.innerHTML = "";
dataPoints.forEach((entry) => {
+ if ((counts[entry.key] || 0) === 0) return;
const row = document.createElement("div");
row.className = "chart-legend-item";
const swatch = document.createElement("span");
@@ -2966,11 +3504,146 @@
text.textContent = `${entry.label}: ${counts[entry.key] || 0}`;
row.appendChild(swatch);
row.appendChild(text);
- statusLegend.appendChild(row);
+ legendEl.appendChild(row);
});
}
}
+ function renderStatusChart(counts) {
+ const palette = {
+ active: "#22c55e",
+ inactive: "#f97316",
+ not_attempted: "#38bdf8",
+ canary: "#a855f7",
+ unknown: "#9ca3af",
+ };
+ const dataPoints = [
+ { key: "active", label: "Active", color: palette.active },
+ { key: "inactive", label: "Inactive", color: palette.inactive },
+ { key: "not_attempted", label: "Not Attempted", color: palette.not_attempted },
+ { key: "canary", label: "Canary Token", color: palette.canary },
+ { key: "unknown", label: "Unknown", color: palette.unknown },
+ ];
+ renderDonutChart(statusChartCanvas, statusLegend, dataPoints, counts, "No validation data yet");
+ }
+
+ function renderSourceChart(counts) {
+ const dataPoints = [
+ { key: "kingfisher", label: "Kingfisher", color: "#0e7c56" },
+ { key: "gitleaks", label: "Gitleaks", color: "#2563eb" },
+ { key: "trufflehog", label: "TruffleHog", color: "#7c3aed" },
+ ];
+ const loadedSources = dataPoints.filter((entry) => (counts[entry.key] || 0) > 0);
+ renderDonutChart(
+ sourceChartCanvas,
+ sourceLegend,
+ loadedSources,
+ counts,
+ "No source data yet",
+ );
+ }
+
+ function renderSourceDetectorCharts() {
+ if (!sourceDetectorCharts) return;
+ const chartEntries = getSourceDetectorChartEntries(findings);
+ if (!chartEntries.length) {
+ sourceDetectorCharts.innerHTML = 'No findings loaded yet.
';
+ return;
+ }
+
+ sourceDetectorCharts.innerHTML = "";
+ chartEntries.forEach((entry, index) => {
+ const card = document.createElement("div");
+ card.className = "chart-card";
+
+ const canvas = document.createElement("canvas");
+ canvas.width = 260;
+ canvas.height = 220;
+ canvas.id = `source-detector-chart-${index}`;
+
+ const legendWrap = document.createElement("div");
+ legendWrap.className = "chart-legend";
+
+ const heading = document.createElement("div");
+ heading.style.marginBottom = "10px";
+
+ const titleRow = document.createElement("div");
+ titleRow.className = "chart-legend-item";
+ titleRow.style.fontWeight = "700";
+ titleRow.textContent = `${entry.sourceLabel} detector mix`;
+
+ const subtitleRow = document.createElement("div");
+ subtitleRow.className = "chart-legend-item";
+ subtitleRow.style.color = "var(--text-muted)";
+ subtitleRow.textContent = `${entry.detectorCount} detector families${entry.detectorCount > 6 ? ", top 6 plus Other" : ""}`;
+
+ heading.appendChild(titleRow);
+ heading.appendChild(subtitleRow);
+ legendWrap.appendChild(heading);
+
+ const legend = document.createElement("div");
+ legend.className = "chart-legend";
+ card.appendChild(canvas);
+ legendWrap.appendChild(legend);
+ card.appendChild(legendWrap);
+ sourceDetectorCharts.appendChild(card);
+
+ renderDonutChart(canvas, legend, entry.dataPoints, entry.counts, "No detector data yet");
+ });
+ }
+
+ function renderDetectorBreakdown() {
+ if (!detectorBreakdown) return;
+ const entries = getDetectorBreakdownEntries(findings);
+ const pageSize = 25;
+ const totalPages = Math.max(1, Math.ceil(entries.length / pageSize));
+ detectorBreakdownPage = Math.min(Math.max(1, detectorBreakdownPage), totalPages);
+
+ if (detectorBreakdownPrev) detectorBreakdownPrev.disabled = detectorBreakdownPage <= 1;
+ if (detectorBreakdownNext) detectorBreakdownNext.disabled = detectorBreakdownPage >= totalPages;
+
+ if (!entries.length) {
+ if (detectorBreakdownPageInfo) detectorBreakdownPageInfo.textContent = "0 of 0";
+ detectorBreakdown.innerHTML = `${
+ detectorBreakdownActiveOnly ? "No active findings loaded yet." : "No findings loaded yet."
+ }
`;
+ return;
+ }
+
+ const pageStart = (detectorBreakdownPage - 1) * pageSize;
+ const pageEntries = entries.slice(pageStart, pageStart + pageSize);
+ const rangeStart = pageStart + 1;
+ const rangeEnd = Math.min(pageStart + pageEntries.length, entries.length);
+ if (detectorBreakdownPageInfo) {
+ detectorBreakdownPageInfo.textContent = `${rangeStart}-${rangeEnd} of ${entries.length}`;
+ }
+
+ const rows = pageEntries.map((entry) => `
+
+
+ ${escapeHtml(entry.label)}
+
+ ${escapeHtml(getSourceDisplayName(entry.source))}
+ ${entry.count}
+ ${entry.active}
+
+ `).join("");
+
+ detectorBreakdown.innerHTML = `
+
+
+
+ Detector ${getDetectorBreakdownSortIndicator("label")}
+ Source ${getDetectorBreakdownSortIndicator("source")}
+ ${detectorBreakdownActiveOnly ? "Active findings" : "Findings"} ${getDetectorBreakdownSortIndicator("count")}
+ Active ${getDetectorBreakdownSortIndicator("active")}
+
+
+ ${rows}
+
+ `;
+ }
+
function triggerDownload(filename, content, type) {
const blob = new Blob([content], { type });
const url = URL.createObjectURL(blob);
diff --git a/src/findings_store.rs b/src/findings_store.rs
index 57070f0..9d3417a 100644
--- a/src/findings_store.rs
+++ b/src/findings_store.rs
@@ -44,14 +44,67 @@ fn origin_fp(os: &OriginSet) -> u64 {
h.finish()
}
+fn dedup_origin_kind(origin: &OriginSet) -> &'static str {
+ if origin.iter().any(|o| matches!(o, Origin::Extended(_))) {
+ "ext"
+ } else {
+ "file_git"
+ }
+}
+
+const DEDUP_BLOOM_FP_RATE: f64 = 0.001;
+const INITIAL_BLOOM_CAPACITY: usize = 5_000_000;
+const MAX_BLOOM_CAPACITY: usize = 10_000_000;
+
+struct DedupBloomSet {
+ filters: Vec>,
+ active_items: usize,
+ active_capacity: usize,
+}
+
+impl DedupBloomSet {
+ fn new() -> Self {
+ Self::with_capacity(INITIAL_BLOOM_CAPACITY)
+ }
+
+ fn with_capacity(initial_capacity: usize) -> Self {
+ let capacity = initial_capacity.max(1);
+ let first = Bloom::new_for_fp_rate(capacity, DEDUP_BLOOM_FP_RATE)
+ .expect("Bloom filter size params are valid");
+ Self { filters: vec![first], active_items: 0, active_capacity: capacity }
+ }
+
+ fn contains_or_insert(&mut self, key: u64) -> bool {
+ if self.filters.iter().any(|filter| filter.check(&key)) {
+ return true;
+ }
+
+ if self.active_items >= self.active_capacity {
+ self.grow();
+ }
+
+ let active = self.filters.last_mut().expect("at least one Bloom filter exists");
+ active.set(&key);
+ self.active_items += 1;
+ false
+ }
+
+ fn grow(&mut self) {
+ self.active_capacity = std::cmp::min(self.active_capacity * 2, MAX_BLOOM_CAPACITY);
+ let next = Bloom::new_for_fp_rate(self.active_capacity, DEDUP_BLOOM_FP_RATE)
+ .expect("Bloom filter size params are valid");
+ self.filters.push(next);
+ self.active_items = 0;
+ }
+}
+
pub struct FindingsStore {
rules: Vec>,
matches: Vec>,
index_map: FxHashMap<(BlobId, OffsetSpan), usize>,
blobs: FxHashSet,
clone_dir: PathBuf,
- seen_bloom: Bloom,
- bloom_items: usize,
+ dedup_filter: DedupBloomSet,
dependent_rule_ids: FxHashSet,
blob_meta: FxHashMap>,
origin_meta: FxHashMap>,
@@ -66,10 +119,6 @@ pub struct FindingsStore {
impl FindingsStore {
pub fn new(clone_dir: PathBuf) -> Self {
- let expected_items = 10_000_000; // tune to your largest scan
- let fp_rate = 0.001; // 0.1 % false-positive rate
- let seen_bloom = Bloom::new_for_fp_rate(expected_items, fp_rate)
- .expect("Bloom filter size params are valid");
Self {
rules: Vec::new(),
matches: Vec::new(),
@@ -78,8 +127,7 @@ impl FindingsStore {
blob_meta: FxHashMap::default(),
origin_meta: FxHashMap::default(),
clone_dir,
- seen_bloom,
- bloom_items: 0,
+ dedup_filter: DedupBloomSet::new(),
dependent_rule_ids: FxHashSet::default(),
docker_images: FxHashMap::default(),
slack_links: FxHashMap::default(),
@@ -187,11 +235,7 @@ impl FindingsStore {
.or_else(|| m.groups.captures.get(0).map(|c| c.raw_value()))
.unwrap_or("");
- let origin_kind = match origin.first() {
- Origin::GitRepo(_) => "git",
- Origin::File(_) => "file",
- Origin::Extended(_) => "ext",
- };
+ let origin_kind = dedup_origin_kind(&origin);
let rule_id = m.rule.id().to_uppercase();
let key_string = if self.dependent_rule_ids.contains(&rule_id) {
@@ -201,11 +245,9 @@ impl FindingsStore {
};
let key = xxh3_64(key_string.as_bytes());
- if self.seen_bloom.check(&key) {
+ if self.dedup_filter.contains_or_insert(key) {
continue; // very likely a duplicate
}
- self.seen_bloom.set(&key);
- self.bloom_items += 1;
}
/*ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@@ -235,13 +277,6 @@ impl FindingsStore {
self.index_map.insert((blob_id, offset_span), idx);
}
- /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
- // Periodically rebuild Bloom filter to bound the FP rate
- if dedup && self.bloom_items > 5_000_000 {
- self.seen_bloom = Bloom::new_for_fp_rate(5_000_000, 0.001).unwrap();
- self.bloom_items = 0;
- }
-
added
}
@@ -473,3 +508,21 @@ impl FindingsStore {
self.matches.chunks(chunk_size).map(|slice| slice.to_vec()) // keep Arc pointers
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::DedupBloomSet;
+
+ #[test]
+ fn dedup_filter_remains_monotonic_across_growth() {
+ let mut filter = DedupBloomSet::with_capacity(2);
+
+ assert!(!filter.contains_or_insert(11));
+ assert!(!filter.contains_or_insert(22));
+ assert!(!filter.contains_or_insert(33));
+
+ assert!(filter.contains_or_insert(11));
+ assert!(filter.contains_or_insert(22));
+ assert!(filter.contains_or_insert(33));
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index 35906aa..5b7a4e1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -108,7 +108,7 @@ fn main() -> anyhow::Result<()> {
// Run the real entry point on a thread with an explicit, larger stack so that
// deeply-nested async state machines (validation pipeline) cannot overflow the
// default main-thread stack.
- const STACK_SIZE: usize = 64 * 1024 * 1024; // 64 MiB
+ const STACK_SIZE: usize = 32 * 1024 * 1024; // 32 MiB
let builder =
std::thread::Builder::new().name("kingfisher-main".to_string()).stack_size(STACK_SIZE);
@@ -146,12 +146,12 @@ fn run() -> anyhow::Result<()> {
};
// Set up the Tokio runtime with the specified number of threads.
- // Worker threads also need larger stacks because timed_validate_single_match
- // compiles to an async state machine whose poll function has a very large
- // stack frame (known LLVM limitation with big async fns).
+ // Worker threads need larger stacks because async state machines (validation
+ // pipeline) can produce large poll stack frames. 8 MiB is sufficient now that
+ // the validators are split into separate async fns.
let runtime = Builder::new_multi_thread()
.worker_threads(num_jobs)
- .thread_stack_size(16 * 1024 * 1024) // 16 MiB per worker
+ .thread_stack_size(8 * 1024 * 1024) // 8 MiB per worker
.enable_all()
.build()
.context("Failed to create Tokio runtime")?;
diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs
index b9f2eb8..f6459db 100644
--- a/src/scanner/validation.rs
+++ b/src/scanner/validation.rs
@@ -13,7 +13,7 @@ use futures::{stream, FutureExt, StreamExt};
use indicatif::{ProgressBar, ProgressStyle};
use liquid::Parser;
use reqwest::StatusCode;
-use rustc_hash::FxHashMap;
+use rustc_hash::{FxHashMap, FxHashSet};
use tokio::{sync::Notify, time::timeout};
use tracing::trace;
@@ -22,7 +22,7 @@ use crate::{
blob::BlobId,
findings_store::{FindingsStore, FindingsStoreMessage},
location::OffsetSpan,
- matcher::{Match, OwnedBlobMatch},
+ matcher::OwnedBlobMatch,
rules::rule::Validation,
validation::{
collect_variables_and_dependencies, utils, validate_single_match, CachedResponse,
@@ -421,40 +421,44 @@ pub async fn run_secret_validation(
let success_count = Arc::new(AtomicUsize::new(0));
let fail_count = Arc::new(AtomicUsize::new(0));
- // ββ 2. Fetch rules + matches ββββββββββββββββββββββββββββββββββββββββββββ
- let (_all_rules, all_matches_by_blob) = {
+ // ββ 2. Fetch matches & partition ββββββββββββββββββββββββββββββββββββββ
+ // β’ simple_matches: Vec of Arcs for rules without dependencies
+ // β’ dependent_blob_ids: just the blob IDs β we re-fetch in Phase 2
+ // so we don't hold two full copies of the match set simultaneously
+ let (simple_matches, dependent_blob_ids) = {
let ds = datastore.lock().unwrap();
- let rules = ds.get_rules()?;
- let mut map: FxHashMap>> = FxHashMap::default();
let matches = if let Some(r) = range.clone() {
ds.get_matches()[r].to_vec()
} else {
ds.get_matches().to_vec()
};
- for arc_msg in matches.into_iter() {
- map.entry(arc_msg.1.id).or_default().push(arc_msg);
+ let mut by_blob: FxHashMap>> = FxHashMap::default();
+ for arc_msg in matches {
+ by_blob.entry(arc_msg.1.id).or_default().push(arc_msg);
}
- (rules, map)
+
+ let mut simple = Vec::new();
+ let mut dep_ids = FxHashSet::default();
+ for (blob_id, blob_matches) in by_blob {
+ if blob_matches.iter().any(|m| !m.2.rule.syntax().depends_on_rule.is_empty()) {
+ dep_ids.insert(blob_id);
+ // Arcs dropped here β not held during Phase 1
+ } else {
+ simple.extend(blob_matches);
+ }
+ }
+ (simple, dep_ids)
};
- // ββ 3. Partition blobs ββββββββββββββββββββββββββββββββββββββββββββββββββ
- let mut simple_matches = Vec::new();
- let mut dependent_blobs = FxHashMap::default(); // blob_id -- Vec>
- for (blob_id, matches) in all_matches_by_blob {
- if matches.iter().any(|m| !m.2.rule.syntax().depends_on_rule.is_empty()) {
- dependent_blobs.insert(blob_id, matches);
- } else {
- simple_matches.extend(matches);
- }
- }
-
- // Result accumulator
- let mut updated_arcs: Vec> = Vec::new();
-
// ββ Phase 1: simple, global de-dupe ββββββββββββββββββββββββββββββββββββββ
if !simple_matches.is_empty() {
- let mut groups: FxHashMap>> = FxHashMap::default();
+ // Keep only ONE representative per (rule_id, secret) group.
+ // Previous code stored ALL matches per group β holding thousands of
+ // Arc clones alive for the entire duration of the concurrent stream.
+ let total_simple = simple_matches.len();
+ let mut representatives: FxHashMap> =
+ FxHashMap::default();
for arc_msg in simple_matches {
// VALIDATION DEDUP: Use get(0) to get the first/primary capture for grouping.
//
@@ -476,18 +480,19 @@ pub async fn run_secret_validation(
validation_group_key = %group_key,
"Grouping finding for validation"
);
- groups.entry(group_key).or_default().push(arc_msg);
+ // Only keep the first representative β extra Arcs are dropped immediately
+ representatives.entry(group_key).or_insert(arc_msg);
}
trace!(
- total_findings = groups.values().map(|v| v.len()).sum::(),
- unique_validation_groups = groups.len(),
+ total_findings = total_simple,
+ unique_validation_groups = representatives.len(),
"Validation grouping complete (internal dedup)"
);
let validation_results = DashMap::::new();
- let pb = ProgressBar::new(groups.len() as u64).with_message("Validating secretsβ¦");
+ let pb = ProgressBar::new(representatives.len() as u64).with_message("Validating secretsβ¦");
pb.set_style(
ProgressStyle::with_template(
"{spinner:.green} {msg} [{bar:40.green/blue}] {pos}/{len} ({percent}%) \
@@ -498,21 +503,29 @@ pub async fn run_secret_validation(
);
pb.enable_steady_tick(Duration::from_millis(100));
+ // Shared empty maps β avoids allocating throwaway DashMaps per task
+ let empty_dep_vars: FxHashMap> = FxHashMap::default();
+ let empty_missing: FxHashMap> = FxHashMap::default();
+ let empty_cache: Arc> = Arc::new(DashMap::new());
+ let empty_inflight: Arc> = Arc::new(DashMap::new());
+
stream::iter(
- groups.values().map(|v| v[0].clone()), // one representative
+ representatives.into_values(), // consumes map, dropping keys
)
.for_each_concurrent(concurrency, |rep_arc| {
- // clones into task
let parser = parser.clone();
let clients = clients.clone();
let cache_glob = cache.clone();
let val_res = &validation_results;
let success = success_count.clone();
let fail = fail_count.clone();
- // *** FIX: Clone the progress bar for each concurrent task ***
let pb = pb.clone();
let access_map = access_map.clone();
let rate_limiter = rate_limiter.clone();
+ let empty_dep_vars = &empty_dep_vars;
+ let empty_missing = &empty_missing;
+ let empty_cache = empty_cache.clone();
+ let empty_inflight = empty_inflight.clone();
async move {
// VALIDATION DEDUP: Use get(0) for the primary secret value.
@@ -523,7 +536,6 @@ pub async fn run_secret_validation(
match val_res.entry(key.clone()) {
dashmap::mapref::entry::Entry::Occupied(_) => return,
dashmap::mapref::entry::Entry::Vacant(entry) => {
- // *** FIX: Corrected placeholder to match struct definition ***
entry.insert(CachedResponse {
body: validation_body::from_string(String::new()),
status: StatusCode::ACCEPTED,
@@ -542,10 +554,10 @@ pub async fn run_secret_validation(
&mut om,
&parser,
&clients,
- &FxHashMap::default(),
- &FxHashMap::default(),
- &Arc::new(DashMap::new()),
- &Arc::new(DashMap::new()),
+ empty_dep_vars,
+ empty_missing,
+ &empty_cache,
+ &empty_inflight,
&success,
&fail,
&cache_glob,
@@ -565,35 +577,59 @@ pub async fn run_secret_validation(
};
val_res.insert(key, cr);
- // Now we use the cloned `pb`
pb.inc(1);
}
.boxed()
})
.await;
- // This is now valid because the original `pb` was never moved
pb.finish();
- for (key, group) in groups {
- let cr = validation_results.get(&key).expect("missing cached result");
- for arc_msg in group {
- let (origin, blob_md, old_match) = &*arc_msg;
- updated_arcs.push(Arc::new((
- origin.clone(),
- blob_md.clone(),
- Match {
- validation_success: cr.is_valid,
- validation_response_status: cr.status.as_u16(),
- validation_response_body: cr.body.clone(),
- ..old_match.clone()
- },
- )));
+ // Apply Phase 1 results in-place β avoids cloning every Match
+ {
+ let mut ds = datastore.lock().unwrap();
+ let matches = ds.get_matches_mut();
+ let slice: &mut [Arc] = if let Some(ref r) = range {
+ &mut matches[r.clone()]
+ } else {
+ matches.as_mut_slice()
+ };
+ for match_arc in slice.iter_mut() {
+ // Skip dependent matches β handled in Phase 2
+ if !match_arc.2.rule.syntax().depends_on_rule.is_empty() {
+ continue;
+ }
+ let secret = match_arc.2.groups.captures.get(0).map_or("", |c| c.raw_value());
+ let key = format!("{}|{}", match_arc.2.rule.id(), secret);
+ if let Some(cr) = validation_results.get(&key) {
+ let (_, _, existing) = Arc::make_mut(match_arc);
+ existing.validation_success = cr.is_valid;
+ existing.validation_response_status = cr.status.as_u16();
+ existing.validation_response_body = cr.body.clone();
+ }
}
}
}
- // ββ Phase 2: blobs with dependencies (original logic) βββββββββββββββββββ
- if !dependent_blobs.is_empty() {
+ // ββ Phase 2: blobs with dependencies βββββββββββββββββββββββββββββββββββββ
+ // Re-fetch dependent matches from the datastore so we don't hold two
+ // copies of the full match set in memory simultaneously.
+ if !dependent_blob_ids.is_empty() {
+ let dependent_blobs: FxHashMap>> = {
+ let ds = datastore.lock().unwrap();
+ let slice = if let Some(ref r) = range {
+ &ds.get_matches()[r.clone()]
+ } else {
+ ds.get_matches()
+ };
+ let mut map: FxHashMap>> = FxHashMap::default();
+ for arc_msg in slice {
+ if dependent_blob_ids.contains(&arc_msg.1.id) {
+ map.entry(arc_msg.1.id).or_default().push(arc_msg.clone());
+ }
+ }
+ map
+ };
+
let blob_ids: Vec<_> = {
let mut v: Vec<_> = dependent_blobs.keys().cloned().collect();
v.sort_unstable();
@@ -615,10 +651,21 @@ pub async fn run_secret_validation(
let val_cache = Arc::new(DashMap::::new());
let in_flight = Arc::new(DashMap::::new());
+ // Collect validation results keyed by finding_fingerprint:
+ // (validation_success, response_body, response_status_u16, dependent_captures)
+ type DepUpdate = (
+ bool,
+ crate::validation_body::ValidationResponseBody,
+ u16,
+ std::collections::BTreeMap,
+ );
+ let mut dep_updates: FxHashMap = FxHashMap::default();
+
for chunk in blob_ids.chunks(chunk_size) {
- let tasks: Vec<_> = chunk
- .iter()
- .map(|blob_id| {
+ // Lazy iterator β futures are created on-demand by buffer_unordered,
+ // not all at once via .collect().
+ let validated_blobs: Vec> =
+ stream::iter(chunk.iter().map(|blob_id| {
let matches_for_blob = dependent_blobs.get(blob_id).unwrap().clone();
let parser = parser.clone();
let clients = clients.clone();
@@ -643,6 +690,9 @@ pub async fn run_secret_validation(
})
.collect::>();
+ // Drop Arc clones early β we only need OwnedBlobMatch from here
+ drop(matches_for_blob);
+
let (dep_vars, missing_deps) = collect_variables_and_dependencies(&owned);
let mut by_key: FxHashMap> =
@@ -705,52 +755,56 @@ pub async fn run_secret_validation(
validated.into_iter().flatten().collect::>()
}
.boxed()
- })
- .collect();
-
- let validated_blobs: Vec> =
- stream::iter(tasks).buffer_unordered(concurrency).collect().await;
+ }))
+ .buffer_unordered(concurrency)
+ .collect()
+ .await;
for blob_vec in validated_blobs {
- if blob_vec.is_empty() {
- continue;
- }
-
- let map_original: FxHashMap = dependent_blobs
- .get(&blob_vec[0].blob_id)
- .unwrap()
- .iter()
- .map(|arc_msg| (arc_msg.2.finding_fingerprint, arc_msg.clone()))
- .collect();
-
for om in blob_vec {
- let orig = map_original.get(&om.finding_fingerprint).unwrap();
-
- updated_arcs.push(Arc::new((
- orig.0.clone(),
- orig.1.clone(),
- Match {
- validation_success: om.validation_success,
- validation_response_body: om.validation_response_body.clone(),
- validation_response_status: om.validation_response_status.as_u16(),
- // Copy dependent_captures from validated OwnedBlobMatch
- // so they're available for building validate/revoke commands
- dependent_captures: om.dependent_captures.clone(),
- ..orig.2.clone()
- },
- )));
+ dep_updates.insert(
+ om.finding_fingerprint,
+ (
+ om.validation_success,
+ om.validation_response_body.clone(),
+ om.validation_response_status.as_u16(),
+ om.dependent_captures.clone(),
+ ),
+ );
}
}
pb.inc(chunk.len() as u64);
}
pb.finish();
+
+ // Drop dependent blob Arc clones so datastore Arcs reach refcount == 1
+ drop(dependent_blobs);
+
+ // Apply Phase 2 results in-place
+ if !dep_updates.is_empty() {
+ let mut ds = datastore.lock().unwrap();
+ let matches = ds.get_matches_mut();
+ let slice: &mut [Arc] = if let Some(ref r) = range {
+ &mut matches[r.clone()]
+ } else {
+ matches.as_mut_slice()
+ };
+ for match_arc in slice.iter_mut() {
+ if let Some((success, body, status, dep_caps)) =
+ dep_updates.remove(&match_arc.2.finding_fingerprint)
+ {
+ let (_, _, existing) = Arc::make_mut(match_arc);
+ existing.validation_success = success;
+ existing.validation_response_status = status;
+ existing.validation_response_body = body;
+ existing.dependent_captures = dep_caps;
+ }
+ }
+ }
}
- // ββ 4. Persist all updates ββββββββββββββββββββββββββββββββββββββββββββββ
- {
- let mut ds = datastore.lock().unwrap();
- ds.replace_matches(updated_arcs);
- }
+ // Reclaim memory from static caches that accumulated during validation
+ crate::validation::clear_validation_caches();
Ok(())
}
diff --git a/src/validation.rs b/src/validation.rs
index 00f259d..46ec4ce 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -302,6 +302,18 @@ pub fn init_validation_caches() {
aws::set_aws_validation_concurrency(15);
}
+/// Clear the static validation caches to reclaim memory after validation completes.
+pub fn clear_validation_caches() {
+ if let Some(c) = VALIDATION_CACHE.get() {
+ c.clear();
+ c.shrink_to_fit();
+ }
+ if let Some(c) = IN_FLIGHT.get() {
+ c.clear();
+ c.shrink_to_fit();
+ }
+}
+
pub fn set_skip_aws_account_ids(ids: I)
where
I: IntoIterator- ,
@@ -600,928 +612,97 @@ async fn timed_validate_single_match<'a>(
m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone());
}
- let rule_syntax = m.rule.syntax();
-
- if let (Some(limiter), Some(validation)) = (rate_limiter, rule_syntax.validation.as_ref()) {
- if should_rate_limit_validation(validation) {
- limiter.wait_for_rule(m.rule.id()).await;
+ {
+ let rule_syntax = m.rule.syntax();
+ if let (Some(limiter), Some(validation)) = (rate_limiter, rule_syntax.validation.as_ref()) {
+ if should_rate_limit_validation(validation) {
+ limiter.wait_for_rule(m.rule.id()).await;
+ }
}
}
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
- // 4. validator switch
+ // 4. validator dispatch
+ //
+ // Each validator lives in its own async fn so LLVM compiles
+ // a separate, smaller poll function for each one. This
+ // prevents the combined stack frame from blowing the stack
+ // on large concurrent workloads.
+ //
+ // We clone the validation enum to release the immutable
+ // borrow on `m` before passing `m` mutably to each helper.
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
- match &rule_syntax.validation {
- // ---------------------------------------------------- HTTP validator
+ let rule_name = m.rule.syntax().name.clone();
+ let validation = m.rule.syntax().validation.clone();
+ let rule_tls_mode_for_raw = m.rule.syntax().tls_mode;
+
+ match &validation {
Some(Validation::Http(http_validation)) => {
- let request_timeout = validation_timeout;
- let multipart_timeout = validation_timeout;
- let max_retries: u32 = validation_retries;
- let request_globals = httpvalidation::with_request_template_globals(&globals);
- let cache_globals = httpvalidation::with_cache_key_template_globals(&globals);
- // render URL
- let url = match render_and_parse_url(
- parser,
- &request_globals,
- &rule_syntax.name,
- &http_validation.request.url,
- clients.allow_internal_ips,
- )
- .await
- {
- Ok(u) => u,
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(e);
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
- };
-
- // build request builder
- let request_builder = match httpvalidation::build_request_builder(
+ validate_http(
+ m,
+ http_validation,
client,
- &http_validation.request.method,
- &url,
- &http_validation.request.headers,
- &http_validation.request.body,
- request_timeout,
parser,
- &request_globals,
- ) {
- Ok(rb) => rb,
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(e);
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
- };
-
- let is_multipart = http_validation.request.multipart.is_some();
- let mut cache_key = String::new();
-
- // old per-request cache (optional)
- if !is_multipart {
- let cache_url = render_template(
- parser,
- &cache_globals,
- &rule_syntax.name,
- &http_validation.request.url,
- )
- .await
- .unwrap_or_else(|_| http_validation.request.url.clone());
-
- let rendered_headers = httpvalidation::process_headers(
- &http_validation.request.headers,
- parser,
- &cache_globals,
- &url,
- )
- .unwrap_or_default();
-
- let mut header_map = BTreeMap::new();
- for (name, value) in rendered_headers.iter() {
- if let Ok(v) = value.to_str() {
- header_map.insert(name.as_str().to_string(), v.to_string());
- }
- }
-
- // Render the body template to include in cache key
- let rendered_body =
- http_validation.request.body.as_ref().and_then(|body_template| {
- parser
- .parse(body_template)
- .ok()
- .and_then(|template| template.render(&cache_globals).ok())
- });
-
- cache_key = httpvalidation::generate_http_cache_key_parts(
- http_validation.request.method.as_str(),
- &cache_url,
- &header_map,
- rendered_body.as_deref(),
- );
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
- }
-
- // helper to execute single non-multipart request with retry
- let exec_single = |builder: reqwest::RequestBuilder| async {
- httpvalidation::retry_request(
- builder,
- max_retries,
- Duration::from_millis(500),
- Duration::from_secs(2),
- )
- .await
- };
-
- // run request (multipart vs non-multipart)
- let resp_res = if is_multipart {
- // build multipart request each retry
- let build_request = || async {
- let method = httpvalidation::parse_http_method(&http_validation.request.method)
- .unwrap_or(reqwest::Method::GET);
-
- let mut fresh_builder =
- client.request(method, url.clone()).timeout(multipart_timeout);
-
- if let Ok(mut headers) = httpvalidation::process_headers(
- &http_validation.request.headers,
- parser,
- &request_globals,
- &url,
- ) {
- // add realistic UA & accept headers
- let std_headers = [
- (header::USER_AGENT, GLOBAL_USER_AGENT.as_str()),
- (header::ACCEPT , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"),
- (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"),
- (header::ACCEPT_ENCODING, "gzip, deflate, br"),
- (header::CONNECTION, "keep-alive"),
- ];
- for (hn, hv) in &std_headers {
- if let Ok(v) = HeaderValue::from_str(hv) {
- headers.insert(hn.clone(), v);
- }
- }
- fresh_builder = fresh_builder.headers(headers);
- }
-
- // build multipart form
- let mut form = multipart::Form::new();
- for part in http_validation.request.multipart.as_ref().unwrap().parts.iter() {
- match part.part_type.as_str() {
- "file" => {
- let path = render_template(
- parser,
- &request_globals,
- &rule_syntax.name,
- &part.content,
- )
- .await
- .unwrap_or_default();
- let bytes = fs::read(path).unwrap_or_default();
- let p = multipart::Part::bytes(bytes)
- .mime_str(
- part.content_type
- .as_deref()
- .unwrap_or("application/octet-stream"),
- )
- .unwrap_or_else(|_| multipart::Part::text("invalid"));
- form = form.part(part.name.clone(), p);
- }
- "text" => {
- let txt = render_template(
- parser,
- &request_globals,
- &rule_syntax.name,
- &part.content,
- )
- .await
- .unwrap_or_default();
- let p = multipart::Part::text(txt)
- .mime_str(part.content_type.as_deref().unwrap_or("text/plain"))
- .unwrap_or_else(|_| multipart::Part::text("invalid"));
- form = form.part(part.name.clone(), p);
- }
- _ => { /* ignore */ }
- }
- }
- fresh_builder.multipart(form)
- };
-
- httpvalidation::retry_multipart_request(
- build_request,
- max_retries as usize,
- Duration::from_millis(500),
- Duration::from_secs(2),
- )
- .await
- } else {
- exec_single(request_builder).await
- };
-
- // handle result
- match resp_res {
- Ok(resp) => {
- let status = resp.status();
- let headers = resp.headers().clone();
- let body = match resp.text().await {
- Ok(b) => b,
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "Error reading response: {}",
- e
- ));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- commit_and_return(m);
- return;
- }
- };
- let display_body = if http_validation.request.response_is_html {
- utils::format_response_body_for_display(&body, max_body_len, true)
- } else {
- truncate_preview(&body, max_body_len)
- };
-
- m.validation_response_status = status;
- let body_opt = validation_body::from_string(display_body.clone());
- m.validation_response_body = body_opt.clone();
- let matchers = match http_validation.request.response_matcher.as_ref() {
- Some(m) => m,
- None => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "HTTP validation for rule '{}' is missing `response_matcher`",
- rule_syntax.name
- ));
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
- };
-
- m.validation_success = httpvalidation::validate_response(
- matchers,
- &body,
- &status,
- &headers,
- http_validation.request.response_is_html,
- );
-
- // Avoid poisoning the cache with transient failures (rate limits, 5xx, etc).
- let cacheable_status = !(status.is_server_error()
- || status == StatusCode::TOO_MANY_REQUESTS
- || status == StatusCode::REQUEST_TIMEOUT);
- if !is_multipart && !cache_key.is_empty() && cacheable_status {
- cache.insert(
- cache_key,
- CachedResponse {
- body: body_opt,
- status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
- }
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("HTTP error: {:?}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
- }
-
- // ---------------------------------------------------- gRPC validator
- Some(Validation::Grpc(grpc_validation_cfg)) => {
- let request_timeout = validation_timeout;
- let request_globals = httpvalidation::with_request_template_globals(&globals);
-
- // Render URL
- let url = match render_and_parse_url(
- parser,
- &request_globals,
- &rule_syntax.name,
- &grpc_validation_cfg.request.url,
+ &globals,
+ cache,
+ &rule_name,
clients.allow_internal_ips,
+ validation_timeout,
+ validation_retries,
+ max_body_len,
)
- .await
- {
- Ok(u) => u,
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(e);
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
- };
-
- // Execute gRPC unary call (HTTP/2 + trailers).
- let res = match grpc_validation::grpc_unary_call_from_rule(
- &url,
- &grpc_validation_cfg.request.headers,
- &grpc_validation_cfg.request.body,
+ .await;
+ }
+ Some(Validation::Grpc(grpc_validation_cfg)) => {
+ validate_grpc(
+ m,
+ grpc_validation_cfg,
parser,
- &request_globals,
- request_timeout,
+ &globals,
+ &rule_name,
+ clients.allow_internal_ips,
+ validation_timeout,
+ max_body_len,
)
- .await
- {
- Ok(r) => r,
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("gRPC error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- commit_and_return(m);
- return;
- }
- };
-
- let status = StatusCode::from_u16(res.http_status.as_u16()).unwrap_or(StatusCode::OK);
- let headers = res.headers;
- let mut body = String::from_utf8_lossy(&res.body_bytes).to_string();
-
- // gRPC errors are typically reported in trailers, not the body.
- // Surface them for debugging and for `--full-validation-response` output.
- let grpc_status =
- headers.get("grpc-status").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
- let grpc_message =
- headers.get("grpc-message").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
- // Avoid storing raw protobuf bytes in the report (they often contain NULs and make
- // output logs non-UTF8). Prefer a compact status/message string.
- if grpc_status == "0" {
- body = "grpc-status=0".to_string();
- } else if body.trim().is_empty()
- && (!grpc_status.is_empty() || !grpc_message.is_empty())
- {
- body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
- } else if body.as_bytes().contains(&0) {
- body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
- }
- if max_body_len > 0 {
- truncate_to_char_boundary(&mut body, max_body_len);
- }
-
- m.validation_response_status = status;
- m.validation_response_body = validation_body::from_string(body.clone());
-
- let matchers = match grpc_validation_cfg.request.response_matcher.as_ref() {
- Some(m) => m,
- None => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "gRPC validation for rule '{}' is missing `response_matcher`",
- rule_syntax.name
- ));
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
- };
-
- m.validation_success =
- httpvalidation::validate_response(matchers, &body, &status, &headers, false);
+ .await;
}
-
- // ---------------------------------------------------- MongoDB validator
Some(Validation::MongoDB) => {
- let uri = globals
- .get("TOKEN")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
-
- if uri.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("MongoDB URI not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = mongodb::generate_mongodb_cache_key(&uri);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match mongodb::validate_mongodb(&uri, use_lax_tls).await {
- Ok((ok, msg)) => {
- m.validation_success = ok;
- m.validation_response_body = validation_body::from_string(msg);
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("MongoDB validation error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
+ validate_mongodb_rule(m, &globals, cache, use_lax_tls).await;
}
-
- // ---------------------------------------------------- MySQL validator
Some(Validation::MySQL) => {
- let mysql_url = globals
- .get("TOKEN")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
-
- if mysql_url.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("MySQL URL not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = mysql::generate_mysql_cache_key(&mysql_url);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match mysql::validate_mysql(&mysql_url, use_lax_tls).await {
- Ok((ok, meta)) => {
- m.validation_success = ok;
- m.validation_response_body = validation_body::from_string(if ok {
- format!("MySQL connection is valid. Metadata: {:?}", meta)
- } else {
- "MySQL connection failed.".to_string()
- });
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("MySQL error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
-
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
+ validate_mysql_rule(m, &globals, cache, use_lax_tls).await;
}
-
- // ------------------------------------------------ Azure Storage validator
Some(Validation::AzureStorage) => {
- let storage_key = captured_values
- .iter()
- .find(|(n, ..)| n == "TOKEN")
- .map(|(_, v, ..)| v.clone())
- .unwrap_or_default();
- let storage_account =
- utils::find_closest_variable(&captured_values, &storage_key, "TOKEN", "AZURENAME")
- .unwrap_or_default();
-
- if storage_account.is_empty() || storage_key.is_empty() {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(
- "Missing Azure Storage account or key.".to_string(),
- );
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let creds_json = format!(
- r#"{{"storage_account":"{}","storage_key":"{}"}}"#,
- storage_account, storage_key
- );
- let cache_key = azure::generate_azure_cache_key(&creds_json);
-
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match azure::validate_azure_storage_credentials(&creds_json, cache).await {
- Ok((ok, msg)) => {
- m.validation_success = ok;
- m.validation_response_body = msg;
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("Azure Storage error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
+ validate_azure_storage(m, &captured_values, cache).await;
}
-
- // ---------------------------------------------------- JDBC validator
Some(Validation::Jdbc) => {
- let jdbc_conn = captured_values
- .iter()
- .find(|(n, ..)| n == "TOKEN")
- .map(|(_, v, ..)| v.clone())
- .unwrap_or_default();
-
- if jdbc_conn.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("JDBC connection string not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = jdbc::generate_jdbc_cache_key(&jdbc_conn);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match jdbc::validate_jdbc(&jdbc_conn, use_lax_tls).await {
- Ok(outcome) => {
- m.validation_success = outcome.valid;
- m.validation_response_body = validation_body::from_string(outcome.message);
- m.validation_response_status = outcome.status;
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("JDBC validation error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
-
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
+ validate_jdbc_rule(m, &captured_values, cache, use_lax_tls).await;
}
-
- // ------------------------------------------------ Postgres validator
Some(Validation::Postgres) => {
- let pg_url = globals
- .get("TOKEN")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
-
- if pg_url.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("Postgres URL not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = postgres::generate_postgres_cache_key(&pg_url);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match postgres::validate_postgres(&pg_url, use_lax_tls).await {
- Ok((ok, meta)) => {
- m.validation_success = ok;
- m.validation_response_body = validation_body::from_string(if ok {
- format!("Postgres connection is valid. Metadata: {:?}", meta)
- } else {
- "Postgres connection failed.".to_string()
- });
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("Postgres error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
+ validate_postgres_rule(m, &globals, cache, use_lax_tls).await;
}
- // ---------------------------------------------------- JWT validator
Some(Validation::JWT) => {
- let token = captured_values
- .iter()
- .find(|(n, ..)| n == "TOKEN")
- .map(|(_, v, ..)| v.clone())
- .unwrap_or_default();
-
- if token.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("JWT token not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- match jwt::validate_jwt(&token, use_lax_tls, clients.allow_internal_ips).await {
- Ok((ok, msg)) => {
- m.validation_success = ok;
- m.validation_response_body = validation_body::from_string(msg);
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("JWT validation error: {}", e));
- m.validation_response_status = StatusCode::BAD_REQUEST;
- }
- }
+ validate_jwt_rule(m, &captured_values, use_lax_tls, clients.allow_internal_ips).await;
}
- // ---------------------------------------------------- AWS validator
Some(Validation::AWS) => {
- let secret = captured_values
- .iter()
- .find(|(n, ..)| n == "TOKEN")
- .map(|(_, v, ..)| v.clone())
- .unwrap_or_default();
- let akid = utils::find_closest_variable(&captured_values, &secret, "TOKEN", "AKID")
- .unwrap_or_default();
-
- if akid.is_empty() || secret.is_empty() {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(
- "Missing AWS access-key ID or secret.".to_string(),
- );
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = aws::generate_aws_cache_key(&akid, &secret);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- if let Some(account_id) = aws::should_skip_aws_validation(&akid) {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "(skip list entry) AWS validation not attempted for account {}.",
- account_id
- ));
- m.validation_response_status = StatusCode::PRECONDITION_REQUIRED;
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
- commit_and_return(m);
- return;
- }
-
- if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "Invalid AWS credentials ({}): {}",
- akid, e
- ));
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- match aws::validate_aws_credentials(&akid, &secret).await {
- Ok((ok, msg)) => {
- m.validation_success = ok;
- if ok {
- let mut body = format!("{} --- ARN: {}", akid, msg);
- if let Ok(acct) = aws::aws_key_to_account_number(&akid) {
- body.push_str(&format!(" --- AWS Account Number: {:012}", acct));
- }
- m.validation_response_body = validation_body::from_string(body);
- m.validation_response_status = StatusCode::OK;
- } else {
- m.validation_response_body = validation_body::from_string(format!(
- "AWS validation error ({}): {}",
- akid, msg
- ));
- m.validation_response_status = StatusCode::UNAUTHORIZED;
- }
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "AWS validation error ({}): {}",
- akid, e
- ));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
+ validate_aws_rule(m, &captured_values, cache).await;
}
-
- // ----------------------------------------------------- GCP validator
Some(Validation::GCP) => {
- let gcp_json = globals
- .get("TOKEN")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
-
- if gcp_json.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("GCP JSON not found.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- let cache_key = gcp::generate_gcp_cache_key(&gcp_json);
- if let Some(cached) = cache.get(&cache_key) {
- let c = cached.value();
- if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
- m.validation_success = c.is_valid;
- m.validation_response_body = c.body.clone();
- m.validation_response_status = c.status;
- commit_and_return(m);
- return;
- }
- }
-
- match gcp::GcpValidator::global() {
- Ok(validator) => {
- match validator.validate_gcp_credentials(&gcp_json.as_bytes()).await {
- Ok((ok, meta)) => {
- m.validation_success = ok;
- m.validation_response_body =
- validation_body::from_string(meta.join("\n"));
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "GCP validation error: {}",
- e
- ));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body = validation_body::from_string(format!(
- "Failed to create GCP validator: {}",
- e
- ));
- m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR;
- }
- }
- cache.insert(
- cache_key,
- CachedResponse {
- body: m.validation_response_body.clone(),
- status: m.validation_response_status,
- is_valid: m.validation_success,
- timestamp: Instant::now(),
- },
- );
+ validate_gcp_rule(m, &globals, cache).await;
}
- // ----------------------------------------------------- Coinbase validator
Some(Validation::Coinbase) => {
- let cred_name = globals
- .get("CRED_NAME")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
- let private_key = globals
- .get("PRIVATE_KEY")
- .and_then(|v| v.as_scalar())
- .map(|s| s.into_owned().to_kstr().to_string())
- .unwrap_or_default();
-
- if cred_name.is_empty() || private_key.is_empty() {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string("Missing key name or private key.".to_string());
- m.validation_response_status = StatusCode::BAD_REQUEST;
- commit_and_return(m);
- return;
- }
-
- match coinbase::validate_cdp_api_key(&cred_name, &private_key, client, parser, cache)
- .await
- {
- Ok((ok, msg)) => {
- m.validation_success = ok;
- m.validation_response_body = msg;
- m.validation_response_status =
- if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
- }
- Err(e) => {
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("Coinbase validation error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
+ validate_coinbase_rule(m, &globals, client, parser, cache).await;
}
- // --------------------------------------------------------- Raw / none
Some(Validation::Raw(raw)) => {
- match kingfisher_scanner::validation::raw::validate_raw(
+ validate_raw_rule(
+ m,
raw,
&globals,
client,
- clients.should_use_lax(rule_syntax.tls_mode),
+ clients.should_use_lax(rule_tls_mode_for_raw),
clients.allow_internal_ips,
)
- .await
- {
- Ok(result) => {
- m.validation_success = result.valid;
- m.validation_response_body = validation_body::from_string(result.body);
- m.validation_response_status = result.status;
- }
- Err(e) => {
- debug!("Raw validation error for {}: {}", raw, e);
- m.validation_success = false;
- m.validation_response_body =
- validation_body::from_string(format!("Raw validation error: {}", e));
- m.validation_response_status = StatusCode::BAD_GATEWAY;
- }
- }
+ .await;
}
None => { /* no validation specified */ }
}
@@ -1530,6 +711,900 @@ async fn timed_validate_single_match<'a>(
commit_and_return(m);
}
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+// Extracted validator functions
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+async fn validate_http(
+ m: &mut OwnedBlobMatch,
+ http_validation: &kingfisher_rules::rule::HttpValidation,
+ client: &Client,
+ parser: &liquid::Parser,
+ globals: &Object,
+ cache: &Cache,
+ rule_name: &str,
+ allow_internal_ips: bool,
+ validation_timeout: Duration,
+ validation_retries: u32,
+ max_body_len: usize,
+) {
+ let request_timeout = validation_timeout;
+ let multipart_timeout = validation_timeout;
+ let max_retries: u32 = validation_retries;
+ let request_globals = httpvalidation::with_request_template_globals(globals);
+ let cache_globals = httpvalidation::with_cache_key_template_globals(globals);
+
+ let url = match render_and_parse_url(
+ parser,
+ &request_globals,
+ rule_name,
+ &http_validation.request.url,
+ allow_internal_ips,
+ )
+ .await
+ {
+ Ok(u) => u,
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(e);
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+ };
+
+ let request_builder = match httpvalidation::build_request_builder(
+ client,
+ &http_validation.request.method,
+ &url,
+ &http_validation.request.headers,
+ &http_validation.request.body,
+ request_timeout,
+ parser,
+ &request_globals,
+ ) {
+ Ok(rb) => rb,
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(e);
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+ };
+
+ let is_multipart = http_validation.request.multipart.is_some();
+ let mut cache_key = String::new();
+
+ if !is_multipart {
+ let cache_url =
+ render_template(parser, &cache_globals, rule_name, &http_validation.request.url)
+ .await
+ .unwrap_or_else(|_| http_validation.request.url.clone());
+
+ let rendered_headers = httpvalidation::process_headers(
+ &http_validation.request.headers,
+ parser,
+ &cache_globals,
+ &url,
+ )
+ .unwrap_or_default();
+
+ let mut header_map = BTreeMap::new();
+ for (name, value) in rendered_headers.iter() {
+ if let Ok(v) = value.to_str() {
+ header_map.insert(name.as_str().to_string(), v.to_string());
+ }
+ }
+
+ let rendered_body = http_validation.request.body.as_ref().and_then(|body_template| {
+ parser
+ .parse(body_template)
+ .ok()
+ .and_then(|template| template.render(&cache_globals).ok())
+ });
+
+ cache_key = httpvalidation::generate_http_cache_key_parts(
+ http_validation.request.method.as_str(),
+ &cache_url,
+ &header_map,
+ rendered_body.as_deref(),
+ );
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+ }
+
+ let exec_single = |builder: reqwest::RequestBuilder| async {
+ httpvalidation::retry_request(
+ builder,
+ max_retries,
+ Duration::from_millis(500),
+ Duration::from_secs(2),
+ )
+ .await
+ };
+
+ let resp_res = if is_multipart {
+ let build_request = || async {
+ let method = httpvalidation::parse_http_method(&http_validation.request.method)
+ .unwrap_or(reqwest::Method::GET);
+
+ let mut fresh_builder = client.request(method, url.clone()).timeout(multipart_timeout);
+
+ if let Ok(mut headers) = httpvalidation::process_headers(
+ &http_validation.request.headers,
+ parser,
+ &request_globals,
+ &url,
+ ) {
+ let std_headers = [
+ (header::USER_AGENT, GLOBAL_USER_AGENT.as_str()),
+ (header::ACCEPT , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"),
+ (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"),
+ (header::ACCEPT_ENCODING, "gzip, deflate, br"),
+ (header::CONNECTION, "keep-alive"),
+ ];
+ for (hn, hv) in &std_headers {
+ if let Ok(v) = HeaderValue::from_str(hv) {
+ headers.insert(hn.clone(), v);
+ }
+ }
+ fresh_builder = fresh_builder.headers(headers);
+ }
+
+ let mut form = multipart::Form::new();
+ for part in http_validation.request.multipart.as_ref().unwrap().parts.iter() {
+ match part.part_type.as_str() {
+ "file" => {
+ let path =
+ render_template(parser, &request_globals, rule_name, &part.content)
+ .await
+ .unwrap_or_default();
+ let bytes = fs::read(path).unwrap_or_default();
+ let p = multipart::Part::bytes(bytes)
+ .mime_str(
+ part.content_type.as_deref().unwrap_or("application/octet-stream"),
+ )
+ .unwrap_or_else(|_| multipart::Part::text("invalid"));
+ form = form.part(part.name.clone(), p);
+ }
+ "text" => {
+ let txt =
+ render_template(parser, &request_globals, rule_name, &part.content)
+ .await
+ .unwrap_or_default();
+ let p = multipart::Part::text(txt)
+ .mime_str(part.content_type.as_deref().unwrap_or("text/plain"))
+ .unwrap_or_else(|_| multipart::Part::text("invalid"));
+ form = form.part(part.name.clone(), p);
+ }
+ _ => { /* ignore */ }
+ }
+ }
+ fresh_builder.multipart(form)
+ };
+
+ httpvalidation::retry_multipart_request(
+ build_request,
+ max_retries as usize,
+ Duration::from_millis(500),
+ Duration::from_secs(2),
+ )
+ .await
+ } else {
+ exec_single(request_builder).await
+ };
+
+ match resp_res {
+ Ok(resp) => {
+ let status = resp.status();
+ let headers = resp.headers().clone();
+ let body = match resp.text().await {
+ Ok(b) => b,
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Error reading response: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ return;
+ }
+ };
+ let display_body = if http_validation.request.response_is_html {
+ utils::format_response_body_for_display(&body, max_body_len, true)
+ } else {
+ truncate_preview(&body, max_body_len)
+ };
+
+ m.validation_response_status = status;
+ let body_opt = validation_body::from_string(display_body.clone());
+ m.validation_response_body = body_opt.clone();
+ let matchers = match http_validation.request.response_matcher.as_ref() {
+ Some(m) => m,
+ None => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(format!(
+ "HTTP validation for rule '{}' is missing `response_matcher`",
+ rule_name
+ ));
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+ };
+
+ m.validation_success = httpvalidation::validate_response(
+ matchers,
+ &body,
+ &status,
+ &headers,
+ http_validation.request.response_is_html,
+ );
+
+ let cacheable_status = !(status.is_server_error()
+ || status == StatusCode::TOO_MANY_REQUESTS
+ || status == StatusCode::REQUEST_TIMEOUT);
+ if !is_multipart && !cache_key.is_empty() && cacheable_status {
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: body_opt,
+ status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+ }
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("HTTP error: {:?}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+}
+
+async fn validate_grpc(
+ m: &mut OwnedBlobMatch,
+ grpc_validation_cfg: &kingfisher_rules::rule::GrpcValidation,
+ parser: &liquid::Parser,
+ globals: &Object,
+ rule_name: &str,
+ allow_internal_ips: bool,
+ validation_timeout: Duration,
+ max_body_len: usize,
+) {
+ let request_globals = httpvalidation::with_request_template_globals(globals);
+
+ let url = match render_and_parse_url(
+ parser,
+ &request_globals,
+ rule_name,
+ &grpc_validation_cfg.request.url,
+ allow_internal_ips,
+ )
+ .await
+ {
+ Ok(u) => u,
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(e);
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+ };
+
+ let res = match grpc_validation::grpc_unary_call_from_rule(
+ &url,
+ &grpc_validation_cfg.request.headers,
+ &grpc_validation_cfg.request.body,
+ parser,
+ &request_globals,
+ validation_timeout,
+ )
+ .await
+ {
+ Ok(r) => r,
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(format!("gRPC error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ return;
+ }
+ };
+
+ let status = StatusCode::from_u16(res.http_status.as_u16()).unwrap_or(StatusCode::OK);
+ let headers = res.headers;
+ let mut body = String::from_utf8_lossy(&res.body_bytes).to_string();
+
+ let grpc_status =
+ headers.get("grpc-status").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
+ let grpc_message =
+ headers.get("grpc-message").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
+ if grpc_status == "0" {
+ body = "grpc-status=0".to_string();
+ } else if body.trim().is_empty() && (!grpc_status.is_empty() || !grpc_message.is_empty()) {
+ body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
+ } else if body.as_bytes().contains(&0) {
+ body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
+ }
+ if max_body_len > 0 {
+ truncate_to_char_boundary(&mut body, max_body_len);
+ }
+
+ m.validation_response_status = status;
+ m.validation_response_body = validation_body::from_string(body.clone());
+
+ let matchers = match grpc_validation_cfg.request.response_matcher.as_ref() {
+ Some(m) => m,
+ None => {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(format!(
+ "gRPC validation for rule '{}' is missing `response_matcher`",
+ rule_name
+ ));
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+ };
+
+ m.validation_success =
+ httpvalidation::validate_response(matchers, &body, &status, &headers, false);
+}
+
+async fn validate_mongodb_rule(
+ m: &mut OwnedBlobMatch,
+ globals: &Object,
+ cache: &Cache,
+ use_lax_tls: bool,
+) {
+ let uri = globals
+ .get("TOKEN")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+
+ if uri.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("MongoDB URI not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = mongodb::generate_mongodb_cache_key(&uri);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match mongodb::validate_mongodb(&uri, use_lax_tls).await {
+ Ok((ok, msg)) => {
+ m.validation_success = ok;
+ m.validation_response_body = validation_body::from_string(msg);
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("MongoDB validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+}
+
+async fn validate_mysql_rule(
+ m: &mut OwnedBlobMatch,
+ globals: &Object,
+ cache: &Cache,
+ use_lax_tls: bool,
+) {
+ let mysql_url = globals
+ .get("TOKEN")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+
+ if mysql_url.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("MySQL URL not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = mysql::generate_mysql_cache_key(&mysql_url);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match mysql::validate_mysql(&mysql_url, use_lax_tls).await {
+ Ok((ok, meta)) => {
+ m.validation_success = ok;
+ m.validation_response_body = validation_body::from_string(if ok {
+ format!("MySQL connection is valid. Metadata: {:?}", meta)
+ } else {
+ "MySQL connection failed.".to_string()
+ });
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("MySQL error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+}
+
+async fn validate_azure_storage(
+ m: &mut OwnedBlobMatch,
+ captured_values: &[(String, String, usize, usize)],
+ cache: &Cache,
+) {
+ let storage_key = captured_values
+ .iter()
+ .find(|(n, ..)| n == "TOKEN")
+ .map(|(_, v, ..)| v.clone())
+ .unwrap_or_default();
+ let storage_account =
+ utils::find_closest_variable(captured_values, &storage_key, "TOKEN", "AZURENAME")
+ .unwrap_or_default();
+
+ if storage_account.is_empty() || storage_key.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("Missing Azure Storage account or key.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let creds_json =
+ format!(r#"{{"storage_account":"{}","storage_key":"{}"}}"#, storage_account, storage_key);
+ let cache_key = azure::generate_azure_cache_key(&creds_json);
+
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match azure::validate_azure_storage_credentials(&creds_json, cache).await {
+ Ok((ok, msg)) => {
+ m.validation_success = ok;
+ m.validation_response_body = msg;
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Azure Storage error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+}
+
+async fn validate_jdbc_rule(
+ m: &mut OwnedBlobMatch,
+ captured_values: &[(String, String, usize, usize)],
+ cache: &Cache,
+ use_lax_tls: bool,
+) {
+ let jdbc_conn = captured_values
+ .iter()
+ .find(|(n, ..)| n == "TOKEN")
+ .map(|(_, v, ..)| v.clone())
+ .unwrap_or_default();
+
+ if jdbc_conn.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("JDBC connection string not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = jdbc::generate_jdbc_cache_key(&jdbc_conn);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match jdbc::validate_jdbc(&jdbc_conn, use_lax_tls).await {
+ Ok(outcome) => {
+ m.validation_success = outcome.valid;
+ m.validation_response_body = validation_body::from_string(outcome.message);
+ m.validation_response_status = outcome.status;
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("JDBC validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+}
+
+async fn validate_postgres_rule(
+ m: &mut OwnedBlobMatch,
+ globals: &Object,
+ cache: &Cache,
+ use_lax_tls: bool,
+) {
+ let pg_url = globals
+ .get("TOKEN")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+
+ if pg_url.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("Postgres URL not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = postgres::generate_postgres_cache_key(&pg_url);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match postgres::validate_postgres(&pg_url, use_lax_tls).await {
+ Ok((ok, meta)) => {
+ m.validation_success = ok;
+ m.validation_response_body = validation_body::from_string(if ok {
+ format!("Postgres connection is valid. Metadata: {:?}", meta)
+ } else {
+ "Postgres connection failed.".to_string()
+ });
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Postgres error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+}
+
+async fn validate_jwt_rule(
+ m: &mut OwnedBlobMatch,
+ captured_values: &[(String, String, usize, usize)],
+ use_lax_tls: bool,
+ allow_internal_ips: bool,
+) {
+ let token = captured_values
+ .iter()
+ .find(|(n, ..)| n == "TOKEN")
+ .map(|(_, v, ..)| v.clone())
+ .unwrap_or_default();
+
+ if token.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("JWT token not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ match jwt::validate_jwt(&token, use_lax_tls, allow_internal_ips).await {
+ Ok((ok, msg)) => {
+ m.validation_success = ok;
+ m.validation_response_body = validation_body::from_string(msg);
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("JWT validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ }
+ }
+}
+
+async fn validate_aws_rule(
+ m: &mut OwnedBlobMatch,
+ captured_values: &[(String, String, usize, usize)],
+ cache: &Cache,
+) {
+ let secret = captured_values
+ .iter()
+ .find(|(n, ..)| n == "TOKEN")
+ .map(|(_, v, ..)| v.clone())
+ .unwrap_or_default();
+ let akid =
+ utils::find_closest_variable(captured_values, &secret, "TOKEN", "AKID").unwrap_or_default();
+
+ if akid.is_empty() || secret.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("Missing AWS access-key ID or secret.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = aws::generate_aws_cache_key(&akid, &secret);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ if let Some(account_id) = aws::should_skip_aws_validation(&akid) {
+ m.validation_success = false;
+ m.validation_response_body = validation_body::from_string(format!(
+ "(skip list entry) AWS validation not attempted for account {}.",
+ account_id
+ ));
+ m.validation_response_status = StatusCode::PRECONDITION_REQUIRED;
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+ return;
+ }
+
+ if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Invalid AWS credentials ({}): {}", akid, e));
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ match aws::validate_aws_credentials(&akid, &secret).await {
+ Ok((ok, msg)) => {
+ m.validation_success = ok;
+ if ok {
+ let mut body = format!("{} --- ARN: {}", akid, msg);
+ if let Ok(acct) = aws::aws_key_to_account_number(&akid) {
+ body.push_str(&format!(" --- AWS Account Number: {:012}", acct));
+ }
+ m.validation_response_body = validation_body::from_string(body);
+ m.validation_response_status = StatusCode::OK;
+ } else {
+ m.validation_response_body = validation_body::from_string(format!(
+ "AWS validation error ({}): {}",
+ akid, msg
+ ));
+ m.validation_response_status = StatusCode::UNAUTHORIZED;
+ }
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("AWS validation error ({}): {}", akid, e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+}
+
+async fn validate_gcp_rule(m: &mut OwnedBlobMatch, globals: &Object, cache: &Cache) {
+ let gcp_json = globals
+ .get("TOKEN")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+
+ if gcp_json.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("GCP JSON not found.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ let cache_key = gcp::generate_gcp_cache_key(&gcp_json);
+ if let Some(cached) = cache.get(&cache_key) {
+ let c = cached.value();
+ if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
+ m.validation_success = c.is_valid;
+ m.validation_response_body = c.body.clone();
+ m.validation_response_status = c.status;
+ return;
+ }
+ }
+
+ match gcp::GcpValidator::global() {
+ Ok(validator) => match validator.validate_gcp_credentials(&gcp_json.as_bytes()).await {
+ Ok((ok, meta)) => {
+ m.validation_success = ok;
+ m.validation_response_body = validation_body::from_string(meta.join("\n"));
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("GCP validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ },
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Failed to create GCP validator: {}", e));
+ m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR;
+ }
+ }
+ cache.insert(
+ cache_key,
+ CachedResponse {
+ body: m.validation_response_body.clone(),
+ status: m.validation_response_status,
+ is_valid: m.validation_success,
+ timestamp: Instant::now(),
+ },
+ );
+}
+
+async fn validate_coinbase_rule(
+ m: &mut OwnedBlobMatch,
+ globals: &Object,
+ client: &Client,
+ parser: &liquid::Parser,
+ cache: &Cache,
+) {
+ let cred_name = globals
+ .get("CRED_NAME")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+ let private_key = globals
+ .get("PRIVATE_KEY")
+ .and_then(|v| v.as_scalar())
+ .map(|s| s.into_owned().to_kstr().to_string())
+ .unwrap_or_default();
+
+ if cred_name.is_empty() || private_key.is_empty() {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string("Missing key name or private key.".to_string());
+ m.validation_response_status = StatusCode::BAD_REQUEST;
+ return;
+ }
+
+ match coinbase::validate_cdp_api_key(&cred_name, &private_key, client, parser, cache).await {
+ Ok((ok, msg)) => {
+ m.validation_success = ok;
+ m.validation_response_body = msg;
+ m.validation_response_status =
+ if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+ }
+ Err(e) => {
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Coinbase validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+}
+
+async fn validate_raw_rule(
+ m: &mut OwnedBlobMatch,
+ raw: &str,
+ globals: &Object,
+ client: &Client,
+ use_lax_tls: bool,
+ allow_internal_ips: bool,
+) {
+ match kingfisher_scanner::validation::raw::validate_raw(
+ raw,
+ globals,
+ client,
+ use_lax_tls,
+ allow_internal_ips,
+ )
+ .await
+ {
+ Ok(result) => {
+ m.validation_success = result.valid;
+ m.validation_response_body = validation_body::from_string(result.body);
+ m.validation_response_status = result.status;
+ }
+ Err(e) => {
+ debug!("Raw validation error for {}: {}", raw, e);
+ m.validation_success = false;
+ m.validation_response_body =
+ validation_body::from_string(format!("Raw validation error: {}", e));
+ m.validation_response_status = StatusCode::BAD_GATEWAY;
+ }
+ }
+}
+
fn populate_globals_from_captures(
globals: &mut Object,
captured_values: &[(String, String, usize, usize)],