performance improvements and rule improvements

This commit is contained in:
Mick Grove 2026-04-24 00:14:56 -07:00
commit a4e8117c8e
16 changed files with 451 additions and 122 deletions

View file

@ -6,7 +6,7 @@ rules:
\b
agora
(?:.|[\n\r]){0,32}?
\b(?:app[_-]?id|customer[_-]?id)\b
(?:\b|_)(?:app[_-]?id|customer[_-]?id)\b
(?:.|[\n\r]){0,16}?
[=:"'\s]
\b

View file

@ -19,8 +19,8 @@ rules:
min_entropy: 3.5
confidence: medium
examples:
- 'CONFIGCAT_SDK_KEY=PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A'
- 'configcat_key: "PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A"'
- 'CONFIGCAT_SDK_KEY=Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h'
- 'configcat_key: "Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h"'
references:
- https://configcat.com/docs/sdk-reference/overview/
validation:
@ -51,7 +51,7 @@ rules:
min_entropy: 3.5
confidence: medium
examples:
- 'CONFIGCAT_SDK_KEY=configcat-sdk-1/PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A'
- 'CONFIGCAT_SDK_KEY=configcat-sdk-1/Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h'
references:
- https://configcat.com/docs/sdk-reference/overview/
validation:

View file

@ -0,0 +1,69 @@
rules:
- name: Confluence Data Center Personal Access Token
id: kingfisher.confluence.1
pattern: |
(?x)
(?i:confluence|wiki)
(?:.|[\n\r]){0,16}?
\b
(
[MNO][A-Za-z0-9+/]{15}
O[g-v]
[A-Za-z0-9+/]{26}
)
pattern_requirements:
min_digits: 2
min_uppercase: 1
min_lowercase: 1
min_entropy: 4.0
confidence: medium
examples:
- 'confluence_pat: "MjQzMjkzMDQyNTI1OgTGWAoKFZTh/Is7cl+cdAI0Lbxo"'
- 'wiki_PAT=MDgxODgyOTYwNTA5OkFSuEyq1mtrLTVNGAPyka+/Vyfv'
references:
- https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html
- https://developer.atlassian.com/server/confluence/confluence-server-rest-api/
validation:
type: Http
content:
request:
headers:
Accept: application/json
Authorization: Bearer {{ TOKEN }}
method: GET
response_matcher:
- report_response: true
- status:
- 200
type: StatusMatch
- type: JsonValid
- type: WordMatch
words:
- '"type":"known"'
url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current
depends_on_rule:
- rule_id: kingfisher.confluence.2
variable: CONFLUENCEDCDOMAIN
- name: Confluence Data Center Domain
id: kingfisher.confluence.2
pattern: |
(?xi)
(?:https?://)?
\b
(
(?:[a-z0-9-]+\.){0,16}
(?:wiki|confluence)[a-z0-9-]*
\.[a-z0-9.-]{2,64}
(?::\d{2,5})?
)
\b
min_entropy: 2.5
visible: false
confidence: medium
examples:
- wiki.corp.mongodb.com
- confluence.example.com
- https://wiki-staging.corp.internal:8443
references:
- https://confluence.atlassian.com/doc/confluence-server-documentation-135922.html

View file

@ -23,39 +23,14 @@ rules:
min_entropy: 3.0
confidence: medium
examples:
- "docusign.secret_key = 7a39ce6d-94cf-4bf6-9e9e-9213373c15f4"
- "docusign\nds_secret = 3d2f18c9-2075-4e78-834b-64f57f8757d0"
validation:
type: Http
content:
request:
method: POST
url: "https://{{ DOCUSIGN_AUTH_HOST }}/oauth/token"
headers:
Accept: application/json
Content-Type: application/x-www-form-urlencoded
body: >
grant_type=authorization_code&code=INVALID_AUTH_CODE&client_id={{ DOCUSIGN_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ REDIRECT_URI | url_encode }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
match_all_words: false
words:
- invalid_grant
- invalid authorization code
- type: WordMatch
words:
- invalid_client
negative: true
depends_on_rule:
- rule_id: kingfisher.docusign.2
variable: DOCUSIGN_CLIENT_ID
- rule_id: kingfisher.docusign.3
variable: DOCUSIGN_AUTH_HOST
- rule_id: kingfisher.docusign.4
variable: REDIRECT_URI
- "docusign.secret_key = 12345678-abcd-9876-5432-abcdef123456"
- "docusign\nds_secret = 87654321-fedc-1234-abcd-fedcba987654"
# Validation intentionally omitted: DocuSign's /oauth/token endpoint
# returns {"error":"invalid_grant"} for any request with an invalid
# authorization code, regardless of whether client_id/client_secret are
# actually valid. That makes it impossible to distinguish live from
# inactive credentials via that endpoint without performing a full OAuth
# flow, which is out of scope for passive validation.
references:
- https://developers.docusign.com/platform/auth/
- https://developers.docusign.com/platform/build-integration/

View file

@ -22,7 +22,7 @@ rules:
min_entropy: 3.3
confidence: medium
examples:
- 'const CLIENTSECRET = "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"'
- 'const CLIENTSECRET = "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"'
depends_on_rule:
- rule_id: "kingfisher.google.1"
variable: GOOGLE_CLIENT_ID
@ -41,9 +41,20 @@ rules:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
match_all_words: false
words:
- invalid_grant
- Malformed auth code
- Bad Request
# Only mark as active when Google acknowledges the credentials
# and rejects the (intentionally invalid) authorization code.
- type: WordMatch
words:
- invalid_client
- unauthorized_client
- unsupported_grant_type
- invalid_request
negative: true
# Revocation not added: Google's OAuth revocation endpoint revokes tokens,
# not client secrets.
@ -80,9 +91,20 @@ rules:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
match_all_words: false
words:
- invalid_grant
- Malformed auth code
- Bad Request
# Only mark as active when Google acknowledges the credentials
# and rejects the (intentionally invalid) authorization code.
- type: WordMatch
words:
- invalid_client
- unauthorized_client
- unsupported_grant_type
- invalid_request
negative: true
# Revocation not added: Google's OAuth revocation endpoint revokes tokens,
# not client secrets.

View file

@ -8,9 +8,9 @@ rules:
(?:.|[\n\r]){0,24}?
\b
(
sk_live_a2V5Xz[A-Za-z0-9+/]{69}
sk_live_a2V5Xz[A-Za-z0-9+/]{69}={0,2}
)
(?![A-Za-z0-9+/])
(?:[^A-Za-z0-9+/=]|$)
pattern_requirements:
min_digits: 2
min_entropy: 3.5

View file

@ -6,7 +6,7 @@ rules:
\b
huawei
(?:.|[\n\r]){0,32}?
\b(?:client[_-]?id|app[_-]?id)\b
(?:\b|_)(?:client[_-]?id|app[_-]?id)\b
(?:.|[\n\r]){0,16}?
[=:"'\s]
\b

View file

@ -58,4 +58,98 @@ rules:
- https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/
depends_on_rule:
- rule_id: kingfisher.jira.1
variable: DOMAIN
variable: DOMAIN
- name: Jira Data Center Personal Access Token
id: kingfisher.jira.3
pattern: |
(?x)
(?i:jira|atlassian)
(?:.|[\n\r]){0,16}?
\b
(
[MNO][A-Za-z0-9+/]{15}
O[g-v]
[A-Za-z0-9+/]{26}
)
pattern_requirements:
min_digits: 2
min_uppercase: 1
min_lowercase: 1
min_entropy: 4.0
confidence: medium
examples:
- 'jira_token: "Mjc2NTIyMTkxNTY2OkurZAe0a40+xLE2fJRBcq/P2vsL"'
- 'atlassian_PAT=OTI0NTIyOQkzMTk3OgyypbjdwdDzTavLf2R1Ls0XJAPm'
references:
- https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html
- https://developer.atlassian.com/server/jira/platform/personal-access-token/
validation:
type: Http
content:
request:
headers:
Accept: application/json
Authorization: Bearer {{ TOKEN }}
method: GET
response_matcher:
- report_response: true
- status:
- 200
type: StatusMatch
- type: JsonValid
url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself
revocation:
type: HttpMultiStep
content:
steps:
- name: lookup_token_id
request:
method: GET
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens
headers:
Accept: application/json
Authorization: Bearer {{ TOKEN }}
response_matcher:
- type: StatusMatch
status: [200]
- type: JsonValid
extract:
JIRA_TOKEN_ID:
type: JsonPath
path: "$[0].id"
- name: revoke_token
request:
method: DELETE
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}
headers:
Authorization: Bearer {{ TOKEN }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [204]
depends_on_rule:
- rule_id: kingfisher.jira.4
variable: JIRADCDOMAIN
- name: Jira Data Center Domain
id: kingfisher.jira.4
pattern: |
(?xi)
(?:https?://)?
\b
(
(?:[a-z0-9-]+\.){0,16}
jira[a-z0-9-]*
\.[a-z0-9.-]{2,64}
(?::\d{2,5})?
)
\b
min_entropy: 2.5
visible: false
confidence: medium
examples:
- jira.example.com
- jira-staging.corp.mongodb.com
- https://jira.corp.internal:8443
references:
- https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html

View file

@ -60,7 +60,7 @@ rules:
min_entropy: 3.5
confidence: medium
examples:
- "kingfisher-svc-1.b500ae.mp-service-account\nSecret: Vqprs8MMJm3XSpfWiuAFECFuyKxxrJL1" # nosemgrep
- "example-svc-1.abcdef.mp-service-account\nSecret: FakeExampleServiceAcctSecretAb12" # nosemgrep
negative_examples:
- mp-service-account
- "mp-service-account\nsecret: a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6" # nosemgrep

View file

@ -28,7 +28,12 @@ rules:
body: '{"event":{"message":"test"}}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [200, 401, 403]
- type: JsonValid
- type: WordMatch
words:
- '"status":"Unauthorized"'
- '"status":"NotAuthorized"'
- '"status":"Forbidden"'
negative: true
references:
- https://pangea.cloud/docs/

View file

@ -18,8 +18,8 @@ rules:
pattern_requirements:
min_digits: 2
examples:
- STORYBLOK_ACCESS_TOKEN=wANpEQEsMYGOwLxwXQ76Ggtt
- storyblok_token = "13Kft3335iwbBOI333wawOtt"
- STORYBLOK_ACCESS_TOKEN=ExampleBogusTokenXYZ12tt
- storyblok_token = "FakeExampleTok45AB67CDtt"
references:
- https://www.storyblok.com/docs/api/content-delivery/v2/getting-started/authentication
- https://www.storyblok.com/docs/concepts/access-tokens

View file

@ -6,7 +6,7 @@ rules:
\b
webex
(?:.|[\n\r]){0,32}?
\b(?:client[_-]?id|client)\b
(?:\b|_)(?:client[_-]?id|client)\b
(?:.|[\n\r]){0,16}?
[=:"'\s]
\b
@ -20,7 +20,7 @@ rules:
confidence: medium
visible: false
examples:
- "webex_client = Ac0769801df88a3535b4b018ef570b499002bda401b3b8789259a937f22d66095"
- "webex_client = c0769801df88a3535b4b018ef570b499002bda401b3b8789259a937f22d66095"
- "WEBEX_CLIENT_ID=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b"
references:
- https://developer.webex.com/docs/platform-introduction

View file

@ -8,9 +8,9 @@ rules:
(?:.|[\n\r]){0,24}?
\b
(
sk_live_a2V5Xz[A-Za-z0-9+/]{69}
sk_live_a2V5Xz[A-Za-z0-9+/]{69}={0,2}
)
(?![A-Za-z0-9+/])
(?:[^A-Za-z0-9+/=]|$)
pattern_requirements:
min_digits: 4
min_lowercase: 4

View file

@ -3011,7 +3011,56 @@
return payload ? [payload] : [];
}
function buildGitMetadata(commitId, email, fileUrl) {
function normalizeRepositoryWebUrl(repoUrl) {
const raw = String(repoUrl || "").trim();
if (!raw || /^file:\/\//i.test(raw)) return "";
if (/^https?:\/\//i.test(raw)) return raw.replace(/\.git$/i, "").replace(/\/+$/g, "");
const sshMatch = raw.match(/^git@([^:]+):(.+)$/i);
if (sshMatch) {
return `https://${sshMatch[1]}/${sshMatch[2].replace(/\.git$/i, "").replace(/^\/+/, "")}`.replace(/\/+$/g, "");
}
const sshUrlMatch = raw.match(/^ssh:\/\/git@([^/]+)\/(.+)$/i);
if (sshUrlMatch) {
return `https://${sshUrlMatch[1]}/${sshUrlMatch[2].replace(/\.git$/i, "").replace(/^\/+/, "")}`.replace(/\/+$/g, "");
}
return "";
}
function buildGitFileUrl(repoUrl, commitId, filePath, line) {
const base = normalizeRepositoryWebUrl(repoUrl);
const commit = String(commitId || "").trim();
const normalizedPath = String(filePath || "").replace(/\\/g, "/").replace(/^\/+/, "");
const lineNumber = toNumberOrNull(line);
if (!base || !commit || !normalizedPath) return "";
try {
const parsed = new URL(base);
const host = (parsed.hostname || "").toLowerCase();
if (host === "dev.azure.com" || host.endsWith(".visualstudio.com")) {
const encodedPath = encodeURIComponent(normalizedPath);
return lineNumber
? `${base}/commit/${commit}?path=/${encodedPath}&line=${lineNumber}`
: `${base}/commit/${commit}?path=/${encodedPath}`;
}
if (host === "bitbucket.org") {
const anchor = encodeURIComponent(normalizedPath).replace(/%2F/g, "/");
return lineNumber
? `${base}/commits/${commit}#L${anchor}F${lineNumber}`
: `${base}/commits/${commit}`;
}
} catch (_) {
return "";
}
return lineNumber
? `${base}/blob/${commit}/${normalizedPath}#L${lineNumber}`
: `${base}/blob/${commit}/${normalizedPath}`;
}
function buildGitMetadata(commitId, email, fileUrl, repositoryUrl) {
const gitMetadata = {};
if (commitId || email) {
gitMetadata.commit = {};
@ -3021,6 +3070,9 @@
if (fileUrl) {
gitMetadata.file = { url: String(fileUrl) };
}
if (repositoryUrl) {
gitMetadata.repository = { url: String(repositoryUrl) };
}
return Object.keys(gitMetadata).length ? gitMetadata : null;
}
@ -3059,10 +3111,16 @@
? buildSyntheticFingerprint("gitleaks", [ruleId, secretIdentity])
: buildSyntheticFingerprint("gitleaks", [ruleId, path, line, columnStart, snippet]))
);
const repoUrl = firstNonEmpty(item.RepoURL, item.repoURL, item.Repo, item.repo);
const directUrl = firstNonEmpty(item.LeakURL, item.leakURL, item.Url, item.url);
const fileUrl = isHttpUrl(directUrl)
? directUrl
: buildGitFileUrl(repoUrl, firstNonEmpty(item.Commit, item.commit), path, line);
const gitMetadata = buildGitMetadata(
firstNonEmpty(item.Commit, item.commit),
firstNonEmpty(item.Email, item.email),
firstNonEmpty(item.LeakURL, item.leakURL, item.Url, item.url)
fileUrl,
normalizeRepositoryWebUrl(repoUrl)
);
return {
@ -3099,6 +3157,7 @@
const detectorName = String(firstNonEmpty(item.DetectorName, item.detectorName, item.DetectorDescription, item.detectorDescription) || "Unknown Detector");
const detectorDescription = String(firstNonEmpty(item.DetectorDescription, item.detectorDescription, detectorName) || detectorName);
const sourceMetadata = item.SourceMetadata || item.sourceMetadata || {};
const gitSource = findDeepValue(sourceMetadata, ["git"]) || {};
const path = String(firstNonEmpty(
findDeepValue(sourceMetadata, ["file", "path", "filename"]),
item.File,
@ -3114,6 +3173,12 @@
item.Url,
item.url
);
const repositoryUrl = firstNonEmpty(
findDeepValue(gitSource, ["repository", "repositoryurl", "repo", "repo_url", "remote", "remoteurl", "remote_url"]),
findDeepValue(sourceMetadata, ["repository", "repositoryurl", "repo", "repo_url", "remote", "remoteurl", "remote_url"]),
item.Repository,
item.repository
);
const commitId = firstNonEmpty(
findDeepValue(sourceMetadata, ["commit", "commitid", "hash", "sha"]),
item.Commit,
@ -3134,7 +3199,9 @@
);
const verified = Boolean(item.Verified);
const verificationError = String(firstNonEmpty(item.VerificationError, item.verificationError) || "");
const gitMetadata = buildGitMetadata(commitId, email, isHttpUrl(sourceUrl) ? sourceUrl : "");
const normalizedRepoUrl = normalizeRepositoryWebUrl(repositoryUrl);
const fileUrl = isHttpUrl(sourceUrl) ? sourceUrl : buildGitFileUrl(normalizedRepoUrl, commitId, path, line);
const gitMetadata = buildGitMetadata(commitId, email, fileUrl, normalizedRepoUrl);
return {
rule: {
@ -3639,6 +3706,19 @@
revoke_command: kf.revoke_command || "",
confidence: kf.confidence || "",
};
if (kf.git_metadata && (!fg.git_metadata || !fg.git_metadata.file || !fg.git_metadata.file.url)) {
fg.git_metadata = Object.assign({}, fg.git_metadata || {});
if (kf.git_metadata.file && kf.git_metadata.file.url) {
fg.git_metadata.file = Object.assign({}, fg.git_metadata.file || {}, {
url: kf.git_metadata.file.url,
});
}
if (kf.git_metadata.repository && kf.git_metadata.repository.url) {
fg.git_metadata.repository = Object.assign({}, fg.git_metadata.repository || {}, {
url: kf.git_metadata.repository.url,
});
}
}
}
return findingList;
}
@ -4547,6 +4627,7 @@
const headers = [
"rule_id",
"rule_name",
"source_tool",
"file_path",
"line",
"validation_status",
@ -4563,6 +4644,7 @@
return [
rule.id || "",
rule.name || "",
getFindingSourceDisplayName(finding),
finding.path || "",
finding.line != null ? finding.line : "",
status,
@ -4736,10 +4818,12 @@
const normalizedStatus = normalizeValidationStatus(statusRaw);
const findingId = getFindingIdFromFinding(finding);
const gitUrl = getFileUrlFromFinding(finding);
const sourceTool = getFindingSourceDisplayName(finding);
const statusColor = normalizedStatus === "active" ? "#dc2626" : normalizedStatus === "inactive" ? "#f97316" : "#6b7280";
return `
<tr>
<td>${escapeHtml(rule.name || rule.id || "")}</td>
<td>${escapeHtml(sourceTool)}</td>
<td style="font-size:10px;font-family:monospace;">${escapeHtml(findingId ? findingId.substring(0, 12) : "")}</td>
<td style="font-size:10px;">${escapeHtml(finding.path || "")}</td>
<td style="color:${statusColor};font-weight:600;">${escapeHtml(statusRaw)}</td>
@ -4750,7 +4834,7 @@
`;
})
.join("")
: '<tr><td colspan="7">No findings available.</td></tr>';
: '<tr><td colspan="8">No findings available.</td></tr>';
// Scan metadata section
const metaLines = [];
@ -4893,12 +4977,13 @@
<thead>
<tr>
<th style="width:15%;">Rule</th>
<th style="width:10%;">Tool</th>
<th style="width:10%;">Fingerprint</th>
<th style="width:22%;">File Path</th>
<th style="width:18%;">File Path</th>
<th style="width:12%;">Status</th>
<th style="width:9%;">Confidence</th>
<th style="width:5%;">Line</th>
<th style="width:27%;">Git URL</th>
<th style="width:21%;">Git URL</th>
</tr>
</thead>
<tbody>
@ -5378,7 +5463,16 @@
const fileObj = finding.git_metadata.file || {};
const url = fileObj.url || "";
if (url && /^https?:\/\//i.test(url)) return url;
return "";
const repoObj = finding.git_metadata.repository || {};
const repoUrl = repoObj.url || "";
const commitId = finding.git_metadata.commit && finding.git_metadata.commit.id
? finding.git_metadata.commit.id
: "";
return buildGitFileUrl(repoUrl, commitId, finding.path || "", finding.line);
}
function getFindingSourceDisplayName(finding) {
return getSourceDisplayName(getFindingSourceTool(finding || {}));
}
function wireKfEnrichmentCopyButton(wrapId, codeId, btnId, cmd) {
@ -5724,6 +5818,7 @@
const { level, text: rationaleText } = generateRiskRationale(f, entries);
const statusRaw = finding.validation && finding.validation.status ? String(finding.validation.status) : "Unknown";
const gitUrl = getFileUrlFromFinding(finding);
const sourceTool = getFindingSourceDisplayName(finding);
const levelColors = {
critical: { bg: "#fef2f2", border: "#fca5a5", text: "#991b1b" },
@ -5815,6 +5910,7 @@
<div class="detail-grid">
<div class="detail-item"><label>Rule</label><div>${escapeHtml(rule.name || "")} (${escapeHtml(rule.id || "")})</div></div>
<div class="detail-item"><label>Validation</label><div>${escapeHtml(statusRaw)}</div></div>
<div class="detail-item"><label>Tool</label><div>${escapeHtml(sourceTool)}</div></div>
<div class="detail-item"><label>Confidence</label><div>${escapeHtml(finding.confidence || "")}</div></div>
<div class="detail-item"><label>Entropy</label><div>${finding.entropy != null ? escapeHtml(String(finding.entropy)) : "—"}</div></div>
<div class="detail-item" style="grid-column:span 2;"><label>File Path</label><div>${escapeHtml(finding.path || "")}</div></div>
@ -5910,6 +6006,7 @@
const colors = levelColors[level] || levelColors.none;
const statusRaw = finding.validation && finding.validation.status ? String(finding.validation.status) : "Unknown";
const gitUrl = getFileUrlFromFinding(finding);
const sourceTool = getFindingSourceDisplayName(finding);
let accessSummary = "";
if (entries.length > 0) {
@ -5946,6 +6043,7 @@
<div style="display:grid;grid-template-columns:1fr 1fr;gap:8px;font-size:12px;margin-bottom:10px;">
<div><strong>Path:</strong> ${escapeHtml(finding.path || "—")}</div>
<div><strong>Line:</strong> ${finding.line != null ? finding.line : "—"}</div>
<div><strong>Tool:</strong> ${escapeHtml(sourceTool)}</div>
<div><strong>Validation:</strong> ${escapeHtml(statusRaw)}</div>
<div><strong>Confidence:</strong> ${escapeHtml(finding.confidence || "—")}</div>
${gitUrl ? `<div style="grid-column:span 2;"><strong>URL:</strong> <a href="${escapeHtml(gitUrl)}" style="color:#1d4ed8;">${escapeHtml(gitUrl)}</a></div>` : ""}
@ -6542,4 +6640,4 @@
}
</script>
</body>
</html>
</html>

View file

@ -8,8 +8,11 @@ use std::{
use anyhow::{Context, Result};
use chrono::Local;
use serde::{Deserialize, Serialize};
use smallvec::{SmallVec, smallvec};
use tracing::debug;
type FingerprintForms = SmallVec<[u64; 2]>;
use crate::findings_store::FindingsStore;
#[derive(Debug, Default, Serialize, Deserialize)]
@ -41,9 +44,10 @@ pub fn load_baseline(path: &Path) -> Result<BaselineFile> {
///
/// Accepts either the decimal form users see in scan output (JSON/pretty/SARIF)
/// or the 16-char zero-padded hex form previously written by `--manage-baseline`.
/// Returns a `SmallVec`-style pair so ambiguous 16-digit all-digit strings — which
/// could be either a decimal fingerprint or a legacy hex fingerprint whose value
/// happens to contain no `a-f` — match against both interpretations.
/// Returns 02 canonical u64 interpretations: ambiguous 16-digit all-digit
/// strings — which could be either a decimal fingerprint or a legacy hex
/// fingerprint whose value happens to contain no `a-f` — yield both so either
/// form matches.
///
/// Detection:
/// 1. A `0x`/`0X` prefix is stripped and the rest parsed as hex.
@ -52,16 +56,22 @@ pub fn load_baseline(path: &Path) -> Result<BaselineFile> {
/// 3. Exactly 16 digits: ambiguous — try both decimal and hex and return whichever
/// interpretations parse successfully, so old baselines keep matching.
/// 4. Otherwise the string is parsed as decimal u64.
fn parse_fingerprint(s: &str) -> Vec<u64> {
fn parse_fingerprint(s: &str) -> FingerprintForms {
let trimmed = s.trim();
if let Some(rest) = trimmed.strip_prefix("0x").or_else(|| trimmed.strip_prefix("0X")) {
return u64::from_str_radix(rest, 16).ok().into_iter().collect();
return match u64::from_str_radix(rest, 16) {
Ok(v) => smallvec![v],
Err(_) => SmallVec::new(),
};
}
if trimmed.len() == 16 && trimmed.chars().all(|c| c.is_ascii_hexdigit()) {
if trimmed.chars().any(|c| c.is_ascii_alphabetic()) {
return u64::from_str_radix(trimmed, 16).ok().into_iter().collect();
return match u64::from_str_radix(trimmed, 16) {
Ok(v) => smallvec![v],
Err(_) => SmallVec::new(),
};
}
let mut out = Vec::with_capacity(2);
let mut out: FingerprintForms = SmallVec::new();
if let Ok(v) = trimmed.parse::<u64>() {
out.push(v);
}
@ -72,7 +82,10 @@ fn parse_fingerprint(s: &str) -> Vec<u64> {
}
return out;
}
trimmed.parse::<u64>().ok().into_iter().collect()
match trimmed.parse::<u64>() {
Ok(v) => smallvec![v],
Err(_) => SmallVec::new(),
}
}
pub fn save_baseline(path: &Path, baseline: &BaselineFile) -> Result<()> {
@ -306,12 +319,12 @@ mod tests {
#[test]
fn parse_fingerprint_accepts_all_forms() {
let value: u64 = 0xfeed_beef_dade_f00d;
assert_eq!(parse_fingerprint(&format!("{:016x}", value)), vec![value]);
assert_eq!(parse_fingerprint(&format!("0x{:016x}", value)), vec![value]);
assert_eq!(parse_fingerprint(&format!("0X{:X}", value)), vec![value]);
assert_eq!(parse_fingerprint(&value.to_string()), vec![value]);
assert_eq!(parse_fingerprint(" 42 "), vec![42]);
assert_eq!(parse_fingerprint("0"), vec![0]);
assert_eq!(parse_fingerprint(&format!("{:016x}", value)).as_slice(), &[value]);
assert_eq!(parse_fingerprint(&format!("0x{:016x}", value)).as_slice(), &[value]);
assert_eq!(parse_fingerprint(&format!("0X{:X}", value)).as_slice(), &[value]);
assert_eq!(parse_fingerprint(&value.to_string()).as_slice(), &[value]);
assert_eq!(parse_fingerprint(" 42 ").as_slice(), &[42]);
assert_eq!(parse_fingerprint("0").as_slice(), &[0]);
assert!(parse_fingerprint("").is_empty());
assert!(parse_fingerprint("notahex").is_empty());
}

View file

@ -2,6 +2,27 @@ use anyhow::Result;
use assert_cmd::Command;
use serde_json::Value;
/// Ensure that none of the example secrets embedded in the built-in rule YAML
/// files validate as active credentials.
///
/// Kingfisher writes two JSON documents to stdout when `--format json` is used:
/// 1. A summary object (`{"findings": N, "successful_validations": M, ...}`)
/// emitted by the scanner runner for every scan.
/// 2. The full report envelope (`{"findings": [ ... ], "metadata": ...}`)
/// emitted by the JSON reporter when there is at least one finding to
/// report. With `--only-valid`, this envelope is omitted when no findings
/// validated successfully.
///
/// Kingfisher's exit code contract (see `determine_exit_code` in `src/main.rs`):
/// * 0 — no visible findings
/// * 200 — visible findings present, but none validated as active
/// * 205 — at least one validated (active) finding
///
/// This test passes as long as `successful_validations` is zero and no entry
/// in the optional findings envelope has validation status "active credential".
/// It is deliberately tolerant of exit code 200, of failed HTTP validations
/// (e.g. network unreachable in CI), and of the summary-only / envelope-only
/// stdout shapes.
#[test]
fn scan_rules_has_no_validated_findings() -> Result<()> {
let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -16,67 +37,99 @@ fn scan_rules_has_no_validated_findings() -> Result<()> {
.output()?;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
// Find the first '[' — start of array
let start = match stdout.find('[') {
Some(i) => i,
None => return Ok(()), // no array found
};
// Scan stdout for top-level JSON values. The stream contains the summary
// object followed optionally by the report envelope (both top-level
// objects or arrays, no wrapping). Walk through, parse each, and collect
// whichever shapes we find.
let mut summary: Option<Value> = None;
let mut envelope: Option<Value> = None;
let mut depth = 0usize;
let mut end = None;
for (i, ch) in stdout.char_indices().skip(start) {
match ch {
'[' => depth += 1,
']' => {
depth -= 1;
if depth == 0 {
end = Some(i);
break;
let bytes = stdout.as_bytes();
let mut idx = 0;
while idx < bytes.len() {
// Skip whitespace / stray non-JSON noise between documents.
while idx < bytes.len() && !matches!(bytes[idx], b'{' | b'[') {
idx += 1;
}
if idx >= bytes.len() {
break;
}
let mut de = serde_json::Deserializer::from_slice(&bytes[idx..]).into_iter::<Value>();
match de.next() {
Some(Ok(value)) => {
let consumed = de.byte_offset();
// Heuristic: the summary has a numeric "findings" field; the
// envelope has an array "findings" field.
if let Some(findings) = value.get("findings") {
if findings.is_array() && envelope.is_none() {
envelope = Some(value);
} else if findings.is_number() && summary.is_none() {
summary = Some(value);
}
}
idx += consumed.max(1);
}
_ => {}
Some(Err(_)) | None => break,
}
}
let json_array_str = match end {
Some(end_idx) => &stdout[start..=end_idx],
None => return Ok(()), // no matching close found
};
// Primary signal: the scanner summary's `successful_validations` counter.
let successful_validations = summary
.as_ref()
.and_then(|s| s.get("successful_validations"))
.and_then(|v| v.as_u64())
.unwrap_or(0);
if json_array_str.trim().is_empty() {
return Ok(());
}
let findings: Vec<Value> = serde_json::from_str(json_array_str)?;
let validated_rule_ids: Vec<String> = findings
.iter()
.filter_map(|finding| {
let status = finding["finding"]["validation"]["status"]
.as_str()
.unwrap_or("")
.to_ascii_lowercase();
if status == "active credential" {
Some(finding["rule"]["id"].as_str().unwrap_or("unknown").to_string())
} else {
None
// Secondary signal: any finding in the report envelope whose validation
// status indicates an active credential. This catches the case where the
// envelope is present (because something validated) and tells us which
// rule's example triggered it.
let mut validated_rule_ids: Vec<String> = Vec::new();
if let Some(env) = &envelope {
if let Some(findings) = env.get("findings").and_then(|v| v.as_array()) {
for finding in findings {
let status = finding
.pointer("/finding/validation/status")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_ascii_lowercase();
if status.contains("active") && !status.contains("inactive") {
let id = finding
.pointer("/rule/id")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
validated_rule_ids.push(id);
}
}
})
.collect();
}
}
assert!(
validated_rule_ids.is_empty(),
"Validated findings detected in rules: {}",
validated_rule_ids.join(", ")
successful_validations == 0 && validated_rule_ids.is_empty(),
"Validated findings detected in rules.\n successful_validations: {}\n active rule ids: {}\nstdout:\n{}\nstderr:\n{}",
successful_validations,
validated_rule_ids.join(", "),
stdout,
stderr,
);
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
panic!(
"kingfisher scan exited non-zero without validated findings in output.\nstdout:\n{}\nstderr:\n{}",
stdout, stderr
);
// Accept exit codes 0 (no findings) and 200 (findings but none validated).
// Anything else — in particular 205 (active validated findings) or a
// crash-style exit — is a real failure.
match output.status.code() {
Some(0) | Some(200) => Ok(()),
Some(code) => {
panic!(
"kingfisher scan exited with unexpected code {code}.\nstdout:\n{stdout}\nstderr:\n{stderr}",
);
}
None => {
panic!(
"kingfisher scan terminated without an exit code.\nstdout:\n{stdout}\nstderr:\n{stderr}",
);
}
}
Ok(())
}