From a4e8117c8e9daa256a4d9535b0bd7d49833e04c3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 24 Apr 2026 00:14:56 -0700 Subject: [PATCH] performance improvements and rule improvements --- crates/kingfisher-rules/data/rules/agora.yml | 2 +- .../kingfisher-rules/data/rules/configcat.yml | 6 +- .../data/rules/confluence.yml | 69 ++++++++ .../kingfisher-rules/data/rules/docusign.yml | 41 +---- crates/kingfisher-rules/data/rules/google.yml | 24 ++- .../kingfisher-rules/data/rules/highnote.yml | 4 +- crates/kingfisher-rules/data/rules/huawei.yml | 2 +- crates/kingfisher-rules/data/rules/jira.yml | 96 ++++++++++- .../kingfisher-rules/data/rules/mixpanel.yml | 2 +- crates/kingfisher-rules/data/rules/pangea.yml | 9 +- .../kingfisher-rules/data/rules/storyblok.yml | 4 +- crates/kingfisher-rules/data/rules/webex.yml | 4 +- crates/kingfisher-rules/data/rules/workos.yml | 4 +- docs/viewer/index.html | 114 ++++++++++++- src/baseline.rs | 41 +++-- tests/int_rules_no_validated_findings.rs | 151 ++++++++++++------ 16 files changed, 451 insertions(+), 122 deletions(-) create mode 100644 crates/kingfisher-rules/data/rules/confluence.yml diff --git a/crates/kingfisher-rules/data/rules/agora.yml b/crates/kingfisher-rules/data/rules/agora.yml index fbf7168..ab0a91c 100644 --- a/crates/kingfisher-rules/data/rules/agora.yml +++ b/crates/kingfisher-rules/data/rules/agora.yml @@ -6,7 +6,7 @@ rules: \b agora (?:.|[\n\r]){0,32}? - \b(?:app[_-]?id|customer[_-]?id)\b + (?:\b|_)(?:app[_-]?id|customer[_-]?id)\b (?:.|[\n\r]){0,16}? [=:"'\s] \b diff --git a/crates/kingfisher-rules/data/rules/configcat.yml b/crates/kingfisher-rules/data/rules/configcat.yml index 7feeac6..2ef1b0f 100644 --- a/crates/kingfisher-rules/data/rules/configcat.yml +++ b/crates/kingfisher-rules/data/rules/configcat.yml @@ -19,8 +19,8 @@ rules: min_entropy: 3.5 confidence: medium examples: - - 'CONFIGCAT_SDK_KEY=PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A' - - 'configcat_key: "PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A"' + - 'CONFIGCAT_SDK_KEY=Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h' + - 'configcat_key: "Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h"' references: - https://configcat.com/docs/sdk-reference/overview/ validation: @@ -51,7 +51,7 @@ rules: min_entropy: 3.5 confidence: medium examples: - - 'CONFIGCAT_SDK_KEY=configcat-sdk-1/PKDVCLf-Hq-h-kCzMp-L7Q/psuH7BGHoUmdONrzzUOY7A' + - 'CONFIGCAT_SDK_KEY=configcat-sdk-1/Aa1Bb2Cc3Dd4Ee5Ff6Gg7H/aA1bB2cC3dD4eE5fF6gG7h' references: - https://configcat.com/docs/sdk-reference/overview/ validation: diff --git a/crates/kingfisher-rules/data/rules/confluence.yml b/crates/kingfisher-rules/data/rules/confluence.yml new file mode 100644 index 0000000..8ba3815 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/confluence.yml @@ -0,0 +1,69 @@ +rules: + - name: Confluence Data Center Personal Access Token + id: kingfisher.confluence.1 + pattern: | + (?x) + (?i:confluence|wiki) + (?:.|[\n\r]){0,16}? + \b + ( + [MNO][A-Za-z0-9+/]{15} + O[g-v] + [A-Za-z0-9+/]{26} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 + min_entropy: 4.0 + confidence: medium + examples: + - 'confluence_pat: "MjQzMjkzMDQyNTI1OgTGWAoKFZTh/Is7cl+cdAI0Lbxo"' + - 'wiki_PAT=MDgxODgyOTYwNTA5OkFSuEyq1mtrLTVNGAPyka+/Vyfv' + references: + - https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html + - https://developer.atlassian.com/server/confluence/confluence-server-rest-api/ + validation: + type: Http + content: + request: + headers: + Accept: application/json + Authorization: Bearer {{ TOKEN }} + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + - type: JsonValid + - type: WordMatch + words: + - '"type":"known"' + url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current + depends_on_rule: + - rule_id: kingfisher.confluence.2 + variable: CONFLUENCEDCDOMAIN + + - name: Confluence Data Center Domain + id: kingfisher.confluence.2 + pattern: | + (?xi) + (?:https?://)? + \b + ( + (?:[a-z0-9-]+\.){0,16} + (?:wiki|confluence)[a-z0-9-]* + \.[a-z0-9.-]{2,64} + (?::\d{2,5})? + ) + \b + min_entropy: 2.5 + visible: false + confidence: medium + examples: + - wiki.corp.mongodb.com + - confluence.example.com + - https://wiki-staging.corp.internal:8443 + references: + - https://confluence.atlassian.com/doc/confluence-server-documentation-135922.html diff --git a/crates/kingfisher-rules/data/rules/docusign.yml b/crates/kingfisher-rules/data/rules/docusign.yml index 56cf8a1..45580b1 100644 --- a/crates/kingfisher-rules/data/rules/docusign.yml +++ b/crates/kingfisher-rules/data/rules/docusign.yml @@ -23,39 +23,14 @@ rules: min_entropy: 3.0 confidence: medium examples: - - "docusign.secret_key = 7a39ce6d-94cf-4bf6-9e9e-9213373c15f4" - - "docusign\nds_secret = 3d2f18c9-2075-4e78-834b-64f57f8757d0" - validation: - type: Http - content: - request: - method: POST - url: "https://{{ DOCUSIGN_AUTH_HOST }}/oauth/token" - headers: - Accept: application/json - Content-Type: application/x-www-form-urlencoded - body: > - grant_type=authorization_code&code=INVALID_AUTH_CODE&client_id={{ DOCUSIGN_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ REDIRECT_URI | url_encode }} - response_matcher: - - report_response: true - - type: StatusMatch - status: [400] - - type: WordMatch - match_all_words: false - words: - - invalid_grant - - invalid authorization code - - type: WordMatch - words: - - invalid_client - negative: true - depends_on_rule: - - rule_id: kingfisher.docusign.2 - variable: DOCUSIGN_CLIENT_ID - - rule_id: kingfisher.docusign.3 - variable: DOCUSIGN_AUTH_HOST - - rule_id: kingfisher.docusign.4 - variable: REDIRECT_URI + - "docusign.secret_key = 12345678-abcd-9876-5432-abcdef123456" + - "docusign\nds_secret = 87654321-fedc-1234-abcd-fedcba987654" + # Validation intentionally omitted: DocuSign's /oauth/token endpoint + # returns {"error":"invalid_grant"} for any request with an invalid + # authorization code, regardless of whether client_id/client_secret are + # actually valid. That makes it impossible to distinguish live from + # inactive credentials via that endpoint without performing a full OAuth + # flow, which is out of scope for passive validation. references: - https://developers.docusign.com/platform/auth/ - https://developers.docusign.com/platform/build-integration/ diff --git a/crates/kingfisher-rules/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml index 4cfbde6..65cad21 100644 --- a/crates/kingfisher-rules/data/rules/google.yml +++ b/crates/kingfisher-rules/data/rules/google.yml @@ -22,7 +22,7 @@ rules: min_entropy: 3.3 confidence: medium examples: - - 'const CLIENTSECRET = "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"' + - 'const CLIENTSECRET = "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"' depends_on_rule: - rule_id: "kingfisher.google.1" variable: GOOGLE_CLIENT_ID @@ -41,9 +41,20 @@ rules: - report_response: true - type: StatusMatch status: [400] + - type: WordMatch + match_all_words: false + words: + - invalid_grant + - Malformed auth code + - Bad Request + # Only mark as active when Google acknowledges the credentials + # and rejects the (intentionally invalid) authorization code. - type: WordMatch words: - invalid_client + - unauthorized_client + - unsupported_grant_type + - invalid_request negative: true # Revocation not added: Google's OAuth revocation endpoint revokes tokens, # not client secrets. @@ -80,9 +91,20 @@ rules: - report_response: true - type: StatusMatch status: [400] + - type: WordMatch + match_all_words: false + words: + - invalid_grant + - Malformed auth code + - Bad Request + # Only mark as active when Google acknowledges the credentials + # and rejects the (intentionally invalid) authorization code. - type: WordMatch words: - invalid_client + - unauthorized_client + - unsupported_grant_type + - invalid_request negative: true # Revocation not added: Google's OAuth revocation endpoint revokes tokens, # not client secrets. diff --git a/crates/kingfisher-rules/data/rules/highnote.yml b/crates/kingfisher-rules/data/rules/highnote.yml index c7d3959..0bf5948 100644 --- a/crates/kingfisher-rules/data/rules/highnote.yml +++ b/crates/kingfisher-rules/data/rules/highnote.yml @@ -8,9 +8,9 @@ rules: (?:.|[\n\r]){0,24}? \b ( - sk_live_a2V5Xz[A-Za-z0-9+/]{69} + sk_live_a2V5Xz[A-Za-z0-9+/]{69}={0,2} ) - (?![A-Za-z0-9+/]) + (?:[^A-Za-z0-9+/=]|$) pattern_requirements: min_digits: 2 min_entropy: 3.5 diff --git a/crates/kingfisher-rules/data/rules/huawei.yml b/crates/kingfisher-rules/data/rules/huawei.yml index 693f593..5b33a3d 100644 --- a/crates/kingfisher-rules/data/rules/huawei.yml +++ b/crates/kingfisher-rules/data/rules/huawei.yml @@ -6,7 +6,7 @@ rules: \b huawei (?:.|[\n\r]){0,32}? - \b(?:client[_-]?id|app[_-]?id)\b + (?:\b|_)(?:client[_-]?id|app[_-]?id)\b (?:.|[\n\r]){0,16}? [=:"'\s] \b diff --git a/crates/kingfisher-rules/data/rules/jira.yml b/crates/kingfisher-rules/data/rules/jira.yml index e5dee8c..23ef153 100644 --- a/crates/kingfisher-rules/data/rules/jira.yml +++ b/crates/kingfisher-rules/data/rules/jira.yml @@ -58,4 +58,98 @@ rules: - https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ depends_on_rule: - rule_id: kingfisher.jira.1 - variable: DOMAIN \ No newline at end of file + variable: DOMAIN + - name: Jira Data Center Personal Access Token + id: kingfisher.jira.3 + pattern: | + (?x) + (?i:jira|atlassian) + (?:.|[\n\r]){0,16}? + \b + ( + [MNO][A-Za-z0-9+/]{15} + O[g-v] + [A-Za-z0-9+/]{26} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 + min_entropy: 4.0 + confidence: medium + examples: + - 'jira_token: "Mjc2NTIyMTkxNTY2OkurZAe0a40+xLE2fJRBcq/P2vsL"' + - 'atlassian_PAT=OTI0NTIyOQkzMTk3OgyypbjdwdDzTavLf2R1Ls0XJAPm' + references: + - https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html + - https://developer.atlassian.com/server/jira/platform/personal-access-token/ + validation: + type: Http + content: + request: + headers: + Accept: application/json + Authorization: Bearer {{ TOKEN }} + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + - type: JsonValid + url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself + revocation: + type: HttpMultiStep + content: + steps: + - name: lookup_token_id + request: + method: GET + url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens + headers: + Accept: application/json + Authorization: Bearer {{ TOKEN }} + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + JIRA_TOKEN_ID: + type: JsonPath + path: "$[0].id" + - name: revoke_token + request: + method: DELETE + url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }} + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] + depends_on_rule: + - rule_id: kingfisher.jira.4 + variable: JIRADCDOMAIN + + - name: Jira Data Center Domain + id: kingfisher.jira.4 + pattern: | + (?xi) + (?:https?://)? + \b + ( + (?:[a-z0-9-]+\.){0,16} + jira[a-z0-9-]* + \.[a-z0-9.-]{2,64} + (?::\d{2,5})? + ) + \b + min_entropy: 2.5 + visible: false + confidence: medium + examples: + - jira.example.com + - jira-staging.corp.mongodb.com + - https://jira.corp.internal:8443 + references: + - https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html \ No newline at end of file diff --git a/crates/kingfisher-rules/data/rules/mixpanel.yml b/crates/kingfisher-rules/data/rules/mixpanel.yml index 56d371b..2ad880b 100644 --- a/crates/kingfisher-rules/data/rules/mixpanel.yml +++ b/crates/kingfisher-rules/data/rules/mixpanel.yml @@ -60,7 +60,7 @@ rules: min_entropy: 3.5 confidence: medium examples: - - "kingfisher-svc-1.b500ae.mp-service-account\nSecret: Vqprs8MMJm3XSpfWiuAFECFuyKxxrJL1" # nosemgrep + - "example-svc-1.abcdef.mp-service-account\nSecret: FakeExampleServiceAcctSecretAb12" # nosemgrep negative_examples: - mp-service-account - "mp-service-account\nsecret: a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6" # nosemgrep diff --git a/crates/kingfisher-rules/data/rules/pangea.yml b/crates/kingfisher-rules/data/rules/pangea.yml index 0300170..ca60962 100644 --- a/crates/kingfisher-rules/data/rules/pangea.yml +++ b/crates/kingfisher-rules/data/rules/pangea.yml @@ -28,7 +28,12 @@ rules: body: '{"event":{"message":"test"}}' response_matcher: - report_response: true - - type: StatusMatch - status: [200, 401, 403] + - type: JsonValid + - type: WordMatch + words: + - '"status":"Unauthorized"' + - '"status":"NotAuthorized"' + - '"status":"Forbidden"' + negative: true references: - https://pangea.cloud/docs/ diff --git a/crates/kingfisher-rules/data/rules/storyblok.yml b/crates/kingfisher-rules/data/rules/storyblok.yml index 833fe81..c4bd015 100644 --- a/crates/kingfisher-rules/data/rules/storyblok.yml +++ b/crates/kingfisher-rules/data/rules/storyblok.yml @@ -18,8 +18,8 @@ rules: pattern_requirements: min_digits: 2 examples: - - STORYBLOK_ACCESS_TOKEN=wANpEQEsMYGOwLxwXQ76Ggtt - - storyblok_token = "13Kft3335iwbBOI333wawOtt" + - STORYBLOK_ACCESS_TOKEN=ExampleBogusTokenXYZ12tt + - storyblok_token = "FakeExampleTok45AB67CDtt" references: - https://www.storyblok.com/docs/api/content-delivery/v2/getting-started/authentication - https://www.storyblok.com/docs/concepts/access-tokens diff --git a/crates/kingfisher-rules/data/rules/webex.yml b/crates/kingfisher-rules/data/rules/webex.yml index f290df0..f4a7923 100644 --- a/crates/kingfisher-rules/data/rules/webex.yml +++ b/crates/kingfisher-rules/data/rules/webex.yml @@ -6,7 +6,7 @@ rules: \b webex (?:.|[\n\r]){0,32}? - \b(?:client[_-]?id|client)\b + (?:\b|_)(?:client[_-]?id|client)\b (?:.|[\n\r]){0,16}? [=:"'\s] \b @@ -20,7 +20,7 @@ rules: confidence: medium visible: false examples: - - "webex_client = Ac0769801df88a3535b4b018ef570b499002bda401b3b8789259a937f22d66095" + - "webex_client = c0769801df88a3535b4b018ef570b499002bda401b3b8789259a937f22d66095" - "WEBEX_CLIENT_ID=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b" references: - https://developer.webex.com/docs/platform-introduction diff --git a/crates/kingfisher-rules/data/rules/workos.yml b/crates/kingfisher-rules/data/rules/workos.yml index addeb77..955ee1a 100644 --- a/crates/kingfisher-rules/data/rules/workos.yml +++ b/crates/kingfisher-rules/data/rules/workos.yml @@ -8,9 +8,9 @@ rules: (?:.|[\n\r]){0,24}? \b ( - sk_live_a2V5Xz[A-Za-z0-9+/]{69} + sk_live_a2V5Xz[A-Za-z0-9+/]{69}={0,2} ) - (?![A-Za-z0-9+/]) + (?:[^A-Za-z0-9+/=]|$) pattern_requirements: min_digits: 4 min_lowercase: 4 diff --git a/docs/viewer/index.html b/docs/viewer/index.html index cc1577b..a34d79b 100644 --- a/docs/viewer/index.html +++ b/docs/viewer/index.html @@ -3011,7 +3011,56 @@ return payload ? [payload] : []; } - function buildGitMetadata(commitId, email, fileUrl) { + function normalizeRepositoryWebUrl(repoUrl) { + const raw = String(repoUrl || "").trim(); + if (!raw || /^file:\/\//i.test(raw)) return ""; + if (/^https?:\/\//i.test(raw)) return raw.replace(/\.git$/i, "").replace(/\/+$/g, ""); + + const sshMatch = raw.match(/^git@([^:]+):(.+)$/i); + if (sshMatch) { + return `https://${sshMatch[1]}/${sshMatch[2].replace(/\.git$/i, "").replace(/^\/+/, "")}`.replace(/\/+$/g, ""); + } + + const sshUrlMatch = raw.match(/^ssh:\/\/git@([^/]+)\/(.+)$/i); + if (sshUrlMatch) { + return `https://${sshUrlMatch[1]}/${sshUrlMatch[2].replace(/\.git$/i, "").replace(/^\/+/, "")}`.replace(/\/+$/g, ""); + } + + return ""; + } + + function buildGitFileUrl(repoUrl, commitId, filePath, line) { + const base = normalizeRepositoryWebUrl(repoUrl); + const commit = String(commitId || "").trim(); + const normalizedPath = String(filePath || "").replace(/\\/g, "/").replace(/^\/+/, ""); + const lineNumber = toNumberOrNull(line); + if (!base || !commit || !normalizedPath) return ""; + + try { + const parsed = new URL(base); + const host = (parsed.hostname || "").toLowerCase(); + if (host === "dev.azure.com" || host.endsWith(".visualstudio.com")) { + const encodedPath = encodeURIComponent(normalizedPath); + return lineNumber + ? `${base}/commit/${commit}?path=/${encodedPath}&line=${lineNumber}` + : `${base}/commit/${commit}?path=/${encodedPath}`; + } + if (host === "bitbucket.org") { + const anchor = encodeURIComponent(normalizedPath).replace(/%2F/g, "/"); + return lineNumber + ? `${base}/commits/${commit}#L${anchor}F${lineNumber}` + : `${base}/commits/${commit}`; + } + } catch (_) { + return ""; + } + + return lineNumber + ? `${base}/blob/${commit}/${normalizedPath}#L${lineNumber}` + : `${base}/blob/${commit}/${normalizedPath}`; + } + + function buildGitMetadata(commitId, email, fileUrl, repositoryUrl) { const gitMetadata = {}; if (commitId || email) { gitMetadata.commit = {}; @@ -3021,6 +3070,9 @@ if (fileUrl) { gitMetadata.file = { url: String(fileUrl) }; } + if (repositoryUrl) { + gitMetadata.repository = { url: String(repositoryUrl) }; + } return Object.keys(gitMetadata).length ? gitMetadata : null; } @@ -3059,10 +3111,16 @@ ? buildSyntheticFingerprint("gitleaks", [ruleId, secretIdentity]) : buildSyntheticFingerprint("gitleaks", [ruleId, path, line, columnStart, snippet])) ); + const repoUrl = firstNonEmpty(item.RepoURL, item.repoURL, item.Repo, item.repo); + const directUrl = firstNonEmpty(item.LeakURL, item.leakURL, item.Url, item.url); + const fileUrl = isHttpUrl(directUrl) + ? directUrl + : buildGitFileUrl(repoUrl, firstNonEmpty(item.Commit, item.commit), path, line); const gitMetadata = buildGitMetadata( firstNonEmpty(item.Commit, item.commit), firstNonEmpty(item.Email, item.email), - firstNonEmpty(item.LeakURL, item.leakURL, item.Url, item.url) + fileUrl, + normalizeRepositoryWebUrl(repoUrl) ); return { @@ -3099,6 +3157,7 @@ const detectorName = String(firstNonEmpty(item.DetectorName, item.detectorName, item.DetectorDescription, item.detectorDescription) || "Unknown Detector"); const detectorDescription = String(firstNonEmpty(item.DetectorDescription, item.detectorDescription, detectorName) || detectorName); const sourceMetadata = item.SourceMetadata || item.sourceMetadata || {}; + const gitSource = findDeepValue(sourceMetadata, ["git"]) || {}; const path = String(firstNonEmpty( findDeepValue(sourceMetadata, ["file", "path", "filename"]), item.File, @@ -3114,6 +3173,12 @@ item.Url, item.url ); + const repositoryUrl = firstNonEmpty( + findDeepValue(gitSource, ["repository", "repositoryurl", "repo", "repo_url", "remote", "remoteurl", "remote_url"]), + findDeepValue(sourceMetadata, ["repository", "repositoryurl", "repo", "repo_url", "remote", "remoteurl", "remote_url"]), + item.Repository, + item.repository + ); const commitId = firstNonEmpty( findDeepValue(sourceMetadata, ["commit", "commitid", "hash", "sha"]), item.Commit, @@ -3134,7 +3199,9 @@ ); const verified = Boolean(item.Verified); const verificationError = String(firstNonEmpty(item.VerificationError, item.verificationError) || ""); - const gitMetadata = buildGitMetadata(commitId, email, isHttpUrl(sourceUrl) ? sourceUrl : ""); + const normalizedRepoUrl = normalizeRepositoryWebUrl(repositoryUrl); + const fileUrl = isHttpUrl(sourceUrl) ? sourceUrl : buildGitFileUrl(normalizedRepoUrl, commitId, path, line); + const gitMetadata = buildGitMetadata(commitId, email, fileUrl, normalizedRepoUrl); return { rule: { @@ -3639,6 +3706,19 @@ revoke_command: kf.revoke_command || "", confidence: kf.confidence || "", }; + if (kf.git_metadata && (!fg.git_metadata || !fg.git_metadata.file || !fg.git_metadata.file.url)) { + fg.git_metadata = Object.assign({}, fg.git_metadata || {}); + if (kf.git_metadata.file && kf.git_metadata.file.url) { + fg.git_metadata.file = Object.assign({}, fg.git_metadata.file || {}, { + url: kf.git_metadata.file.url, + }); + } + if (kf.git_metadata.repository && kf.git_metadata.repository.url) { + fg.git_metadata.repository = Object.assign({}, fg.git_metadata.repository || {}, { + url: kf.git_metadata.repository.url, + }); + } + } } return findingList; } @@ -4547,6 +4627,7 @@ const headers = [ "rule_id", "rule_name", + "source_tool", "file_path", "line", "validation_status", @@ -4563,6 +4644,7 @@ return [ rule.id || "", rule.name || "", + getFindingSourceDisplayName(finding), finding.path || "", finding.line != null ? finding.line : "", status, @@ -4736,10 +4818,12 @@ const normalizedStatus = normalizeValidationStatus(statusRaw); const findingId = getFindingIdFromFinding(finding); const gitUrl = getFileUrlFromFinding(finding); + const sourceTool = getFindingSourceDisplayName(finding); const statusColor = normalizedStatus === "active" ? "#dc2626" : normalizedStatus === "inactive" ? "#f97316" : "#6b7280"; return ` ${escapeHtml(rule.name || rule.id || "")} + ${escapeHtml(sourceTool)} ${escapeHtml(findingId ? findingId.substring(0, 12) : "")} ${escapeHtml(finding.path || "")} ${escapeHtml(statusRaw)} @@ -4750,7 +4834,7 @@ `; }) .join("") - : 'No findings available.'; + : 'No findings available.'; // Scan metadata section const metaLines = []; @@ -4893,12 +4977,13 @@ Rule + Tool Fingerprint - File Path + File Path Status Confidence Line - Git URL + Git URL @@ -5378,7 +5463,16 @@ const fileObj = finding.git_metadata.file || {}; const url = fileObj.url || ""; if (url && /^https?:\/\//i.test(url)) return url; - return ""; + const repoObj = finding.git_metadata.repository || {}; + const repoUrl = repoObj.url || ""; + const commitId = finding.git_metadata.commit && finding.git_metadata.commit.id + ? finding.git_metadata.commit.id + : ""; + return buildGitFileUrl(repoUrl, commitId, finding.path || "", finding.line); + } + + function getFindingSourceDisplayName(finding) { + return getSourceDisplayName(getFindingSourceTool(finding || {})); } function wireKfEnrichmentCopyButton(wrapId, codeId, btnId, cmd) { @@ -5724,6 +5818,7 @@ const { level, text: rationaleText } = generateRiskRationale(f, entries); const statusRaw = finding.validation && finding.validation.status ? String(finding.validation.status) : "Unknown"; const gitUrl = getFileUrlFromFinding(finding); + const sourceTool = getFindingSourceDisplayName(finding); const levelColors = { critical: { bg: "#fef2f2", border: "#fca5a5", text: "#991b1b" }, @@ -5815,6 +5910,7 @@
${escapeHtml(rule.name || "")} (${escapeHtml(rule.id || "")})
${escapeHtml(statusRaw)}
+
${escapeHtml(sourceTool)}
${escapeHtml(finding.confidence || "")}
${finding.entropy != null ? escapeHtml(String(finding.entropy)) : "—"}
${escapeHtml(finding.path || "")}
@@ -5910,6 +6006,7 @@ const colors = levelColors[level] || levelColors.none; const statusRaw = finding.validation && finding.validation.status ? String(finding.validation.status) : "Unknown"; const gitUrl = getFileUrlFromFinding(finding); + const sourceTool = getFindingSourceDisplayName(finding); let accessSummary = ""; if (entries.length > 0) { @@ -5946,6 +6043,7 @@
Path: ${escapeHtml(finding.path || "—")}
Line: ${finding.line != null ? finding.line : "—"}
+
Tool: ${escapeHtml(sourceTool)}
Validation: ${escapeHtml(statusRaw)}
Confidence: ${escapeHtml(finding.confidence || "—")}
${gitUrl ? `
URL: ${escapeHtml(gitUrl)}
` : ""} @@ -6542,4 +6640,4 @@ } - \ No newline at end of file + diff --git a/src/baseline.rs b/src/baseline.rs index 67905c2..e53854b 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -8,8 +8,11 @@ use std::{ use anyhow::{Context, Result}; use chrono::Local; use serde::{Deserialize, Serialize}; +use smallvec::{SmallVec, smallvec}; use tracing::debug; +type FingerprintForms = SmallVec<[u64; 2]>; + use crate::findings_store::FindingsStore; #[derive(Debug, Default, Serialize, Deserialize)] @@ -41,9 +44,10 @@ pub fn load_baseline(path: &Path) -> Result { /// /// Accepts either the decimal form users see in scan output (JSON/pretty/SARIF) /// or the 16-char zero-padded hex form previously written by `--manage-baseline`. -/// Returns a `SmallVec`-style pair so ambiguous 16-digit all-digit strings — which -/// could be either a decimal fingerprint or a legacy hex fingerprint whose value -/// happens to contain no `a-f` — match against both interpretations. +/// Returns 0–2 canonical u64 interpretations: ambiguous 16-digit all-digit +/// strings — which could be either a decimal fingerprint or a legacy hex +/// fingerprint whose value happens to contain no `a-f` — yield both so either +/// form matches. /// /// Detection: /// 1. A `0x`/`0X` prefix is stripped and the rest parsed as hex. @@ -52,16 +56,22 @@ pub fn load_baseline(path: &Path) -> Result { /// 3. Exactly 16 digits: ambiguous — try both decimal and hex and return whichever /// interpretations parse successfully, so old baselines keep matching. /// 4. Otherwise the string is parsed as decimal u64. -fn parse_fingerprint(s: &str) -> Vec { +fn parse_fingerprint(s: &str) -> FingerprintForms { let trimmed = s.trim(); if let Some(rest) = trimmed.strip_prefix("0x").or_else(|| trimmed.strip_prefix("0X")) { - return u64::from_str_radix(rest, 16).ok().into_iter().collect(); + return match u64::from_str_radix(rest, 16) { + Ok(v) => smallvec![v], + Err(_) => SmallVec::new(), + }; } if trimmed.len() == 16 && trimmed.chars().all(|c| c.is_ascii_hexdigit()) { if trimmed.chars().any(|c| c.is_ascii_alphabetic()) { - return u64::from_str_radix(trimmed, 16).ok().into_iter().collect(); + return match u64::from_str_radix(trimmed, 16) { + Ok(v) => smallvec![v], + Err(_) => SmallVec::new(), + }; } - let mut out = Vec::with_capacity(2); + let mut out: FingerprintForms = SmallVec::new(); if let Ok(v) = trimmed.parse::() { out.push(v); } @@ -72,7 +82,10 @@ fn parse_fingerprint(s: &str) -> Vec { } return out; } - trimmed.parse::().ok().into_iter().collect() + match trimmed.parse::() { + Ok(v) => smallvec![v], + Err(_) => SmallVec::new(), + } } pub fn save_baseline(path: &Path, baseline: &BaselineFile) -> Result<()> { @@ -306,12 +319,12 @@ mod tests { #[test] fn parse_fingerprint_accepts_all_forms() { let value: u64 = 0xfeed_beef_dade_f00d; - assert_eq!(parse_fingerprint(&format!("{:016x}", value)), vec![value]); - assert_eq!(parse_fingerprint(&format!("0x{:016x}", value)), vec![value]); - assert_eq!(parse_fingerprint(&format!("0X{:X}", value)), vec![value]); - assert_eq!(parse_fingerprint(&value.to_string()), vec![value]); - assert_eq!(parse_fingerprint(" 42 "), vec![42]); - assert_eq!(parse_fingerprint("0"), vec![0]); + assert_eq!(parse_fingerprint(&format!("{:016x}", value)).as_slice(), &[value]); + assert_eq!(parse_fingerprint(&format!("0x{:016x}", value)).as_slice(), &[value]); + assert_eq!(parse_fingerprint(&format!("0X{:X}", value)).as_slice(), &[value]); + assert_eq!(parse_fingerprint(&value.to_string()).as_slice(), &[value]); + assert_eq!(parse_fingerprint(" 42 ").as_slice(), &[42]); + assert_eq!(parse_fingerprint("0").as_slice(), &[0]); assert!(parse_fingerprint("").is_empty()); assert!(parse_fingerprint("notahex").is_empty()); } diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index b58d96a..e672c04 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -2,6 +2,27 @@ use anyhow::Result; use assert_cmd::Command; use serde_json::Value; +/// Ensure that none of the example secrets embedded in the built-in rule YAML +/// files validate as active credentials. +/// +/// Kingfisher writes two JSON documents to stdout when `--format json` is used: +/// 1. A summary object (`{"findings": N, "successful_validations": M, ...}`) +/// emitted by the scanner runner for every scan. +/// 2. The full report envelope (`{"findings": [ ... ], "metadata": ...}`) +/// emitted by the JSON reporter when there is at least one finding to +/// report. With `--only-valid`, this envelope is omitted when no findings +/// validated successfully. +/// +/// Kingfisher's exit code contract (see `determine_exit_code` in `src/main.rs`): +/// * 0 — no visible findings +/// * 200 — visible findings present, but none validated as active +/// * 205 — at least one validated (active) finding +/// +/// This test passes as long as `successful_validations` is zero and no entry +/// in the optional findings envelope has validation status "active credential". +/// It is deliberately tolerant of exit code 200, of failed HTTP validations +/// (e.g. network unreachable in CI), and of the summary-only / envelope-only +/// stdout shapes. #[test] fn scan_rules_has_no_validated_findings() -> Result<()> { let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) @@ -16,67 +37,99 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { .output()?; let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); - // Find the first '[' — start of array - let start = match stdout.find('[') { - Some(i) => i, - None => return Ok(()), // no array found - }; + // Scan stdout for top-level JSON values. The stream contains the summary + // object followed optionally by the report envelope (both top-level + // objects or arrays, no wrapping). Walk through, parse each, and collect + // whichever shapes we find. + let mut summary: Option = None; + let mut envelope: Option = None; - let mut depth = 0usize; - let mut end = None; - for (i, ch) in stdout.char_indices().skip(start) { - match ch { - '[' => depth += 1, - ']' => { - depth -= 1; - if depth == 0 { - end = Some(i); - break; + let bytes = stdout.as_bytes(); + let mut idx = 0; + while idx < bytes.len() { + // Skip whitespace / stray non-JSON noise between documents. + while idx < bytes.len() && !matches!(bytes[idx], b'{' | b'[') { + idx += 1; + } + if idx >= bytes.len() { + break; + } + let mut de = serde_json::Deserializer::from_slice(&bytes[idx..]).into_iter::(); + match de.next() { + Some(Ok(value)) => { + let consumed = de.byte_offset(); + // Heuristic: the summary has a numeric "findings" field; the + // envelope has an array "findings" field. + if let Some(findings) = value.get("findings") { + if findings.is_array() && envelope.is_none() { + envelope = Some(value); + } else if findings.is_number() && summary.is_none() { + summary = Some(value); + } } + idx += consumed.max(1); } - _ => {} + Some(Err(_)) | None => break, } } - let json_array_str = match end { - Some(end_idx) => &stdout[start..=end_idx], - None => return Ok(()), // no matching close found - }; + // Primary signal: the scanner summary's `successful_validations` counter. + let successful_validations = summary + .as_ref() + .and_then(|s| s.get("successful_validations")) + .and_then(|v| v.as_u64()) + .unwrap_or(0); - if json_array_str.trim().is_empty() { - return Ok(()); - } - - let findings: Vec = serde_json::from_str(json_array_str)?; - let validated_rule_ids: Vec = findings - .iter() - .filter_map(|finding| { - let status = finding["finding"]["validation"]["status"] - .as_str() - .unwrap_or("") - .to_ascii_lowercase(); - if status == "active credential" { - Some(finding["rule"]["id"].as_str().unwrap_or("unknown").to_string()) - } else { - None + // Secondary signal: any finding in the report envelope whose validation + // status indicates an active credential. This catches the case where the + // envelope is present (because something validated) and tells us which + // rule's example triggered it. + let mut validated_rule_ids: Vec = Vec::new(); + if let Some(env) = &envelope { + if let Some(findings) = env.get("findings").and_then(|v| v.as_array()) { + for finding in findings { + let status = finding + .pointer("/finding/validation/status") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_ascii_lowercase(); + if status.contains("active") && !status.contains("inactive") { + let id = finding + .pointer("/rule/id") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + validated_rule_ids.push(id); + } } - }) - .collect(); + } + } assert!( - validated_rule_ids.is_empty(), - "Validated findings detected in rules: {}", - validated_rule_ids.join(", ") + successful_validations == 0 && validated_rule_ids.is_empty(), + "Validated findings detected in rules.\n successful_validations: {}\n active rule ids: {}\nstdout:\n{}\nstderr:\n{}", + successful_validations, + validated_rule_ids.join(", "), + stdout, + stderr, ); - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - panic!( - "kingfisher scan exited non-zero without validated findings in output.\nstdout:\n{}\nstderr:\n{}", - stdout, stderr - ); + // Accept exit codes 0 (no findings) and 200 (findings but none validated). + // Anything else — in particular 205 (active validated findings) or a + // crash-style exit — is a real failure. + match output.status.code() { + Some(0) | Some(200) => Ok(()), + Some(code) => { + panic!( + "kingfisher scan exited with unexpected code {code}.\nstdout:\n{stdout}\nstderr:\n{stderr}", + ); + } + None => { + panic!( + "kingfisher scan terminated without an exit code.\nstdout:\n{stdout}\nstderr:\n{stderr}", + ); + } } - - Ok(()) }