- Fixed kingfisher scan so that providing --branch without --since-commit now diffs the branch against the empty tree and scans every commit reachable from that branch.

- Added rules for meraki, duffel, finnhub, frameio, freshbooks, gitter, infracost, launchdarkly, lob, maxmind, messagebird, nytimes, prefect, salingo, sendinblue, sentry, shippo, twitch, typeform
This commit is contained in:
Mick Grove 2025-10-20 18:23:12 -07:00
commit 122885199d
25 changed files with 900 additions and 96 deletions

View file

@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
## [v1.59.0]
- Fixed `kingfisher scan` so that providing `--branch` without `--since-commit` now diffs the branch against the empty tree and scans every commit reachable from that branch.
- Added rules for meraki, duffel, finnhub, frameio, freshbooks, gitter, infracost, launchdarkly, lob, maxmind, messagebird, nytimes, prefect, salingo, sendinblue, sentry, shippo, twitch, typeform
-
## [v1.58.0]
- Added first-class Hugging Face scanning support, including CLI enumeration, token authentication, and integration with remote scans.
- Condensed GitError formatting to report the exit status and the first informative lines from stdout/stderr, producing concise git clone failure logs.

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.58.0"
version = "1.59.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -430,6 +430,18 @@ kingfisher scan \
--branch development
```
When `--since-commit` is omitted, specifying `--branch` scans the requested ref directly. This makes it easy to analyze a feature branch without checking it out locally.
```bash
# Scan a branch from an existing checkout
kingfisher scan ~/tmp/repo --branch feature-123
# Or scan a branch when cloning on the fly
kingfisher scan \
--git-url https://github.com/org/repo.git \
--branch origin/feature-123
```
In CI systems that expose the base and head commits explicitly, you can pass those SHAs directly while still using `--git-url`:
```bash

View file

@ -0,0 +1,36 @@
rules:
- name: Cisco Meraki API Key
id: kingfisher.ciscomeraki.1
pattern: |
(?xi)
meraki
(?:.|[\n\r]){0,32}?
\b
(
[0-9a-f]{40}
)
\b
min_entropy: 3.3
confidence: medium
examples:
- MERAKI_API_KEY=1234567890abcdef1234567890abcdef12345678
- |-
// Meraki configuration
const MERAKI_KEY = "abcdefabcdefabcdefabcdefabcdefabcdefabcd";
references:
- https://developer.cisco.com/meraki/api-v1/overview/
validation:
type: Http
content:
request:
method: GET
url: https://api.meraki.com/api/v1/organizations
headers:
X-Cisco-Meraki-API-Key: '{{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- type: JsonValid

32
data/rules/duffel.yml Normal file
View file

@ -0,0 +1,32 @@
rules:
- name: Duffel API Token
id: kingfisher.duffel.1
pattern: |
(?xi)
\b
(
duffel_(?:test|live)_[a-z0-9_\-=]{43}
)
\b
min_entropy: 3.2
confidence: medium
examples:
- DUFFEL_TOKEN=duffel_test_qwertyuiopasdfghjklzxcvbnm123456789abcdefgh
- 'Authorization: "Bearer duffel_live_abcd1234efgh5678ijkl9012mnop3456qrstuvwxyza"'
references:
- https://duffel.com/docs/api
validation:
type: Http
content:
request:
method: GET
url: https://api.duffel.com/airlines
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
Duffel-Version: v1
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

33
data/rules/finnhub.yml Normal file
View file

@ -0,0 +1,33 @@
rules:
- name: Finnhub API Token
id: kingfisher.finnhub.1
pattern: |
(?xi)
\b
finnhub
(?:.|[\n\r]){0,24}?
\b
(
[a-z0-9]{20}
)
\b
min_entropy: 3.0
confidence: medium
examples:
- FINNHUB_API_KEY=cd3f1a2b3c4d5e6f7a8b
- '"finnhubToken": "9b8a7c6d5e4f3a2b1c0d"'
references:
- https://finnhub.io/docs/api
validation:
type: Http
content:
request:
method: GET
url: https://finnhub.io/api/v1/stock/profile2?symbol=MDB&token={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

30
data/rules/frameio.yml Normal file
View file

@ -0,0 +1,30 @@
rules:
- name: Frame.io API Token
id: kingfisher.frameio.1
pattern: |
(?xi)
\b
(
fio-u-[a-z0-9\-_=]{64}
)
min_entropy: 3.3
confidence: medium
examples:
- FRAMEIO_TOKEN=fio-u-a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6a7b8c9d0e1f2
- '"Authorization": "Bearer fio-u-b1c2d3e4f5g6h7i8j9k0l1m2n3o4p5q6r7s8t9u0v1w2x3y4z5a6b7c8d9e0f123"'
references:
- https://developer.frame.io/docs/api/authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.frame.io/v2/me
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

34
data/rules/freshbooks.yml Normal file
View file

@ -0,0 +1,34 @@
rules:
- name: FreshBooks Access Token
id: kingfisher.freshbooks.1
pattern: |
(?xi)
\b
freshbooks
(?:.|[\n\r]){0,32}?
\b
(
[a-z0-9]{64}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- FRESHBOOKS_TOKEN=0f1e2d3c4b5a69788776655443322110ffeeddccbbaa00998877665544332211
- '"freshbooksAccess": "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd"'
references:
- https://www.freshbooks.com/api/authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.freshbooks.com/auth/api/v1/users/me
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

34
data/rules/gitter.yml Normal file
View file

@ -0,0 +1,34 @@
rules:
- name: Gitter Access Token
id: kingfisher.gitter.1
pattern: |
(?xi)
\b
gitter
(?:.|[\n\r]){0,24}?
\b
(
[a-z0-9_-]{40}
)
\b
min_entropy: 3.2
confidence: medium
examples:
- GITTER_TOKEN=abcd1234efgh5678ijkl9012mnop3456qrst7890
- '"gitterToken": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0"'
references:
- https://developer.gitter.im/docs/authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.gitter.im/v1/user
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

33
data/rules/infracost.yml Normal file
View file

@ -0,0 +1,33 @@
rules:
- name: Infracost API Token
id: kingfisher.infracost.1
pattern: |
(?xi)
\b
(
ico-[a-z0-9]{32}
)
\b
min_entropy: 3.3
confidence: medium
examples:
- export INFRACOST_API_KEY=ico-abcdefabcdefabcdefabcdefabcdefab
- '"infracost": "ico-1234567890abcdef1234567890abcdef"'
references:
- https://www.infracost.io/docs/api_reference/
validation:
type: Http
content:
request:
method: POST
url: https://pricing.api.infracost.io/graphql
headers:
X-Api-Key: '{{ TOKEN }}'
Content-Type: application/json
Accept: application/json
body: '{"query":"{ ping }"}'
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

View file

@ -0,0 +1,32 @@
rules:
- name: LaunchDarkly Access Token
id: kingfisher.launchdarkly.1
pattern: |
(?xi)
launchdarkly
(?:.|[\n\r]){0,32}?
\b
(
[a-z0-9_\-=]{40}
)
min_entropy: 3.2
confidence: medium
examples:
- LAUNCHDARKLY_TOKEN=api-123abc456def789ghi012jkl345mno678pqr
- '"launchdarkly": "ld-abcdefghijklmno1234567890pqrstuvwxzab"'
references:
- https://docs.launchdarkly.com/sdk/api/
validation:
type: Http
content:
request:
method: GET
url: https://app.launchdarkly.com/api/v2/members
headers:
Authorization: '{{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

65
data/rules/lob.yml Normal file
View file

@ -0,0 +1,65 @@
rules:
- name: Lob API Key
id: kingfisher.lob.1
pattern: |
(?xi)
lob
(?:.|[\n\r]){0,24}?
\b
(
(?:live|test)_[a-f0-9]{35}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- export LOB_API_KEY=live_9f8e7d6c5b4a3210fedcba09876543210ab
- LOB_KEY="test_abcdefabcdefabcdefabcdefabcdefabcde"
references:
- https://docs.lob.com/#section/Authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.lob.com/v1/addresses?limit=1
headers:
Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Lob Publishable API Key
id: kingfisher.lob.2
pattern: |
(?xi)
lob
(?:.|[\n\r]){0,24}?
\b
(
(?:test|live)_pub_[a-f0-9]{31}
)
\b
min_entropy: 3.0
confidence: medium
examples:
- const LOB_PUB_KEY = "test_pub_abcdefabcdefabcdefabcdefabcdefa";
- LOB_PUBLISHABLE="live_pub_1234567890abcdef1234567890abcde"
references:
- https://docs.lob.com/#section/Authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.lob.com/v1/addresses?limit=1
headers:
Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

30
data/rules/maxmind.yml Normal file
View file

@ -0,0 +1,30 @@
rules:
- name: MaxMind License Key
id: kingfisher.maxmind.1
pattern: |
(?xi)
\b
(
[a-z0-9]{6}_[a-z0-9]{29}_mmk
)
\b
min_entropy: 3.8
confidence: medium
examples:
- MAXMIND_LICENSE=AB12CD_1234567890abcdef1234567890abc_mmk
- license_key="ZXCVBN_0987654321abcdef1234567890abc_mmk"
references:
- https://dev.maxmind.com/geoip/docs/web-services
validation:
type: Http
content:
request:
method: GET
url: https://geoip.maxmind.com/geoip/v2.1/city/me?license_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

View file

@ -0,0 +1,33 @@
rules:
- name: MessageBird API Token
id: kingfisher.messagebird.1
pattern: |
(?xi)
\b
message[_-]?bird
(?:.|[\n\r]){0,32}?
(
[a-z0-9]{25}
)
\b
min_entropy: 3.4
confidence: medium
examples:
- MESSAGEBIRD_API_KEY=abcdefghijklmnopqrstuvwxy
- "messagebird_token: 'abcde12345fghij67890klmno'"
references:
- https://developers.messagebird.com/api/#authentication
validation:
type: Http
content:
request:
method: GET
url: https://rest.messagebird.com/balance
headers:
Authorization: 'AccessKey {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

32
data/rules/nytimes.yml Normal file
View file

@ -0,0 +1,32 @@
rules:
- name: New York Times API Key
id: kingfisher.nytimes.1
pattern: |
(?xi)
(?:nytimes|new[- ]?york[- ]?times)
(?:.|[\n\r]){0,32}?
\b
(
[a-z0-9_\-=]{32}
)
\b
min_entropy: 3.2
confidence: medium
examples:
- NYTIMES_API_KEY=abcd1234efgh5678ijkl9012mnop3456
- '"new-york-times": "zyxw9876vuts5432rqpo1098nmlk7654"'
references:
- https://developer.nytimes.com/
validation:
type: Http
content:
request:
method: GET
url: https://api.nytimes.com/svc/topstories/v2/home.json?api-key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

31
data/rules/prefect.yml Normal file
View file

@ -0,0 +1,31 @@
rules:
- name: Prefect API Token
id: kingfisher.prefect.1
pattern: |
(?xi)
\b
(
pnu_[a-z0-9]{36}
)
\b
min_entropy: 3.0
confidence: medium
examples:
- PREFECT_API_TOKEN=pnu_1234567890abcdef1234567890abcdef1234
- '"prefectToken": "pnu_abcdefabcdefabcdefabcdefabcdefabcdef"'
references:
- https://docs.prefect.io/latest/concepts/api_keys/
validation:
type: Http
content:
request:
method: GET
url: https://api.prefect.cloud/api/me
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

31
data/rules/scalingo.yml Normal file
View file

@ -0,0 +1,31 @@
rules:
- name: Scalingo API Token
id: kingfisher.scalingo.1
pattern: |
(?xi)
\b
(
tk-us-[\w-]{48}
)
\b
min_entropy: 3.0
confidence: medium
examples:
- SCALINGO_TOKEN=tk-us-abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef
- '"scalingo": "tk-us-1234567890abcdef1234567890abcdef1234567890abcdef"'
references:
- https://developers.scalingo.com/apps/api/authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.scalingo.com/v1/users/self
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

31
data/rules/sendinblue.yml Normal file
View file

@ -0,0 +1,31 @@
rules:
- name: Sendinblue API Token
id: kingfisher.sendinblue.1
pattern: |
(?xi)
\b
(
xkeysib-[a-f0-9]{64}-[a-z0-9]{16}
)
\b
min_entropy: 3.2
confidence: medium
examples:
- XKEYSIB_TOKEN=xkeysib-abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd-1234567890abcd12
- '"sendinblue": "xkeysib-1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef-ab12cd34ef56gh78"'
references:
- https://developers.sendinblue.com/docs/authentication
validation:
type: Http
content:
request:
method: GET
url: https://api.sendinblue.com/v3/account
headers:
api-key: '{{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

95
data/rules/sentry.yml Normal file
View file

@ -0,0 +1,95 @@
rules:
- name: Sentry Access Token
id: kingfisher.sentry.1
pattern: |
(?xi)
\b
sentry
(?:.|[\n\r]){0,32}?
\b
(
[a-f0-9]{64}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- SENTRY_TOKEN=abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd
- '"sentry": "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"'
references:
- https://docs.sentry.io/api/auth/
validation:
type: Http
content:
request:
method: GET
url: https://sentry.io/api/0/projects/
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Sentry Organization Token
id: kingfisher.sentry.2
pattern: |
(?xi)
\b
(
sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}
)
min_entropy: 4.2
confidence: medium
examples:
- sntrys_eyJpYXQiOjE2OTA4ODAwMDAsInJlZ2lvbl91cmwiOiJodHRwczovL3NlbnRyeS5pby9vcmdzL215LW9yZy8ifQ==_abcdefghijklmnopqrstuvwx1234567890abcdefabc
- sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABCD
references:
- https://docs.sentry.io/api/auth/
validation:
type: Http
content:
request:
method: GET
url: https://sentry.io/api/0/projects/
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Sentry User Token
id: kingfisher.sentry.3
pattern: |
(?xi)
\b
(
sntryu_[a-f0-9]{64}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- sntryu_abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd
- SNTRY_USER="sntryu_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
references:
- https://docs.sentry.io/api/auth/
validation:
type: Http
content:
request:
method: GET
url: https://sentry.io/api/0/projects/
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

31
data/rules/shippo.yml Normal file
View file

@ -0,0 +1,31 @@
rules:
- name: Shippo API Token
id: kingfisher.shippo.1
pattern: |
(?xi)
\b
(
shippo_(?:live|test)_[a-f0-9]{40}
)
\b
min_entropy: 3.3
confidence: medium
examples:
- SHIPPO_TOKEN=shippo_test_1234567890abcdef1234567890abcdef12345678
- 'Authorization: "ShippoToken shippo_live_abcdefabcdefabcdefabcdefabcdefabcdefabcd"'
references:
- https://goshippo.com/docs/reference
validation:
type: Http
content:
request:
method: GET
url: https://api.goshippo.com/shipments/
headers:
Authorization: 'ShippoToken {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

33
data/rules/twitch.yml Normal file
View file

@ -0,0 +1,33 @@
rules:
- name: Twitch API Token
id: kingfisher.twitch.1
pattern: |
(?xi)
\b
twitch
(?:.|[\n\r]){0,32}?
(
[a-z0-9]{30}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- TWITCH_TOKEN=abcdefghijklmnopqrstuvwx123456
- "twitch_api_token: '0123456789abcdefghijklmnopqrstuv'"
references:
- https://dev.twitch.tv/docs/authentication/validate-tokens/
validation:
type: Http
content:
request:
method: GET
url: https://id.twitch.tv/oauth2/validate
headers:
Authorization: 'OAuth {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

32
data/rules/typeform.yml Normal file
View file

@ -0,0 +1,32 @@
rules:
- name: Typeform API Token
id: kingfisher.typeform.1
pattern: |
(?xi)
\b
typeform
(?:.|[\n\r]){0,32}?
(
tfp_[a-z0-9_\-=\.]{59}
)
min_entropy: 4.0
confidence: medium
examples:
- TYPEFORM_TOKEN=tfp_abcd1234efgh5678ijkl9012mnop3456qrst7890uvwx_yzABCD1234efgh
- "typeform_api_key: 'tfp_qwerty1234567890asdfgh0987654321zxcvbnmPOIU_0987654321lkjhi'"
references:
- https://developer.typeform.com/get-started/personal-access-token/
validation:
type: Http
content:
request:
method: GET
url: https://api.typeform.com/forms
headers:
Authorization: 'Bearer {{ TOKEN }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -327,8 +327,8 @@ pub struct InputSpecifierArgs {
#[arg(long = "since-commit", value_name = "GIT-REF", help_heading = "Git Options")]
pub since_commit: Option<String>,
/// Branch or ref containing changes to scan (defaults to HEAD)
#[arg(long, value_name = "GIT-REF", requires = "since_commit", help_heading = "Git Options")]
/// Branch or ref to scan or compare against (defaults to HEAD)
#[arg(long, value_name = "GIT-REF", help_heading = "Git Options")]
pub branch: Option<String>,
}

View file

@ -60,8 +60,8 @@ use tracing::debug;
#[derive(Clone)]
pub struct GitDiffConfig {
pub since_ref: String,
pub branch_ref: Option<String>,
pub since_ref: Option<String>,
pub branch_ref: String,
}
struct EnumeratorConfig {

View file

@ -60,10 +60,20 @@ pub fn enumerate_filesystem_inputs(
) -> Result<()> {
let repo_scan_timeout = Duration::from_secs(args.git_repo_timeout);
let diff_config = args.input_specifier_args.since_commit.as_ref().map(|since| GitDiffConfig {
since_ref: since.clone(),
branch_ref: args.input_specifier_args.branch.clone(),
});
let diff_config = if args.input_specifier_args.since_commit.is_some()
|| args.input_specifier_args.branch.is_some()
{
Some(GitDiffConfig {
since_ref: args.input_specifier_args.since_commit.clone(),
branch_ref: args
.input_specifier_args
.branch
.clone()
.unwrap_or_else(|| "HEAD".to_string()),
})
} else {
None
};
let progress = if progress_enabled {
let style =
@ -709,101 +719,123 @@ fn enumerate_git_diff_repo(
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
collect_commit_metadata: bool,
) -> Result<GitRepoResult> {
let since_ref = diff_cfg.since_ref.clone();
let branch_ref = diff_cfg.branch_ref.clone().unwrap_or_else(|| "HEAD".to_string());
let GitDiffConfig { since_ref, branch_ref } = diff_cfg;
let base_id = resolve_diff_ref(&repository, path, &since_ref).with_context(|| {
format!("Failed to resolve --since-commit '{}' in repository {}", since_ref, path.display())
})?;
let head_id = resolve_diff_ref(&repository, path, &branch_ref).with_context(|| {
format!("Failed to resolve --branch '{}' in repository {}", branch_ref, path.display())
})?;
let blobs = {
let head_id = resolve_diff_ref(&repository, path, &branch_ref).with_context(|| {
format!("Failed to resolve --branch '{}' in repository {}", branch_ref, path.display())
})?;
let base_commit = base_id
.object()
.with_context(|| format!("Failed to load commit {} for diffing", base_id.to_hex()))?
.try_into_commit()
.with_context(|| format!("Referenced object {} is not a commit", base_id.to_hex()))?;
let head_commit = head_id
.object()
.with_context(|| format!("Failed to load commit {} for diffing", head_id.to_hex()))?
.try_into_commit()
.with_context(|| format!("Referenced object {} is not a commit", head_id.to_hex()))?;
let head_commit = head_id
.object()
.with_context(|| format!("Failed to load commit {} for diffing", head_id.to_hex()))?
.try_into_commit()
.with_context(|| format!("Referenced object {} is not a commit", head_id.to_hex()))?;
let base_tree = base_commit
.tree()
.with_context(|| format!("Failed to read tree for commit {}", base_id.to_hex()))?;
let head_tree = head_commit
.tree()
.with_context(|| format!("Failed to read tree for commit {}", head_id.to_hex()))?;
let head_tree = head_commit
.tree()
.with_context(|| format!("Failed to read tree for commit {}", head_id.to_hex()))?;
let changes =
repository.diff_tree_to_tree(Some(&base_tree), Some(&head_tree), None).with_context(
|| format!("Failed to compute diff between '{}' and '{}'", since_ref, branch_ref),
)?;
let mut base_tree = None;
// Release tree handles before returning the repository to avoid borrow check conflicts.
drop(base_tree);
drop(head_tree);
if let Some(ref since_ref_value) = since_ref {
let base_id =
resolve_diff_ref(&repository, path, since_ref_value).with_context(|| {
format!(
"Failed to resolve --since-commit '{}' in repository {}",
since_ref_value,
path.display()
)
})?;
let commit_metadata = if collect_commit_metadata {
let committer = head_commit
.committer()
.with_context(|| format!("Failed to read committer for {}", branch_ref))?
.trim();
let timestamp = committer.time().unwrap_or_else(|_| gix::date::Time::new(0, 0));
Arc::new(CommitMetadata {
commit_id: head_commit.id,
committer_name: committer.name.to_str_lossy().into_owned(),
committer_email: committer.email.to_str_lossy().into_owned(),
committer_timestamp: timestamp,
})
} else {
Arc::new(CommitMetadata {
commit_id: head_commit.id,
committer_name: String::new(),
committer_email: String::new(),
committer_timestamp: gix::date::Time::new(0, 0),
})
};
let commit = base_id
.object()
.with_context(|| format!("Failed to load commit {} for diffing", base_id.to_hex()))?
.try_into_commit()
.with_context(|| {
format!("Referenced object {} is not a commit", base_id.to_hex())
})?;
let tree = commit
.tree()
.with_context(|| format!("Failed to read tree for commit {}", base_id.to_hex()))?;
let mut blobs = Vec::new();
for change in changes {
let (entry_mode, id, location) = match change {
ChangeDetached::Addition { entry_mode, id, location, .. } => (entry_mode, id, location),
ChangeDetached::Modification { entry_mode, id, location, .. } => {
(entry_mode, id, location)
}
ChangeDetached::Rewrite { entry_mode, id, location, .. } => (entry_mode, id, location),
ChangeDetached::Deletion { .. } => continue,
base_tree = Some(tree);
}
let changes = repository
.diff_tree_to_tree(base_tree.as_ref(), Some(&head_tree), None)
.with_context(|| {
if let Some(ref since_ref_value) = since_ref {
format!(
"Failed to compute diff between '{}' and '{}'",
since_ref_value, branch_ref
)
} else {
format!("Failed to compute tree for '{}'", branch_ref)
}
})?;
let commit_metadata = if collect_commit_metadata {
let committer = head_commit
.committer()
.with_context(|| format!("Failed to read committer for {}", branch_ref))?
.trim();
let timestamp = committer.time().unwrap_or_else(|_| gix::date::Time::new(0, 0));
Arc::new(CommitMetadata {
commit_id: head_commit.id,
committer_name: committer.name.to_str_lossy().into_owned(),
committer_email: committer.email.to_str_lossy().into_owned(),
committer_timestamp: timestamp,
})
} else {
Arc::new(CommitMetadata {
commit_id: head_commit.id,
committer_name: String::new(),
committer_email: String::new(),
committer_timestamp: gix::date::Time::new(0, 0),
})
};
match entry_mode.kind() {
EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link => {}
_ => continue,
}
let mut blobs = Vec::new();
for change in changes {
let (entry_mode, id, location) = match change {
ChangeDetached::Addition { entry_mode, id, location, .. } => {
(entry_mode, id, location)
}
ChangeDetached::Modification { entry_mode, id, location, .. } => {
(entry_mode, id, location)
}
ChangeDetached::Rewrite { entry_mode, id, location, .. } => {
(entry_mode, id, location)
}
ChangeDetached::Deletion { .. } => continue,
};
let relative_path_str = String::from_utf8_lossy(location.as_ref()).into_owned();
let relative_path = Path::new(&relative_path_str);
if let Some(gs) = &exclude_globset {
if gs.is_match(relative_path) || gs.is_match(&path.join(relative_path)) {
debug!(
"Skipping {} due to --exclude while diffing {}",
relative_path.display(),
path.display()
);
continue;
match entry_mode.kind() {
EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link => {}
_ => continue,
}
let relative_path_str = String::from_utf8_lossy(location.as_ref()).into_owned();
let relative_path = Path::new(&relative_path_str);
if let Some(gs) = &exclude_globset {
if gs.is_match(relative_path) || gs.is_match(&path.join(relative_path)) {
debug!(
"Skipping {} due to --exclude while diffing {}",
relative_path.display(),
path.display()
);
continue;
}
}
let appearance =
BlobAppearance { commit_metadata: Arc::clone(&commit_metadata), path: location };
blobs.push(GitBlobMetadata { blob_oid: id, first_seen: smallvec![appearance] });
}
let appearance =
BlobAppearance { commit_metadata: Arc::clone(&commit_metadata), path: location };
blobs.push(GitBlobMetadata { blob_oid: id, first_seen: smallvec![appearance] });
}
// Release commit handles before returning the repository to avoid borrow check conflicts.
drop(base_commit);
drop(head_commit);
blobs
};
Ok(GitRepoResult { repository, path: path.to_owned(), blobs })
}
@ -889,6 +921,16 @@ fn reference_candidates(reference: &str) -> Vec<String> {
#[cfg(test)]
mod tests {
use std::fs;
use std::path::Path;
use super::{enumerate_git_diff_repo, GitDiffConfig};
use anyhow::Result;
use bstr::ByteSlice;
use git2::{Repository as Git2Repository, Signature};
use gix::{open::Options, open_opts};
use tempfile::tempdir;
use super::reference_candidates;
#[test]
@ -941,6 +983,44 @@ mod tests {
fn reference_candidates_for_head_symbol() {
assert_eq!(reference_candidates("HEAD"), vec!["HEAD".to_string()]);
}
#[test]
fn enumerate_git_diff_repo_branch_without_since_scans_head_tree() -> Result<()> {
let temp = tempdir()?;
let repo_path = temp.path().join("repo");
let repo = Git2Repository::init(&repo_path)?;
let signature = Signature::now("tester", "tester@example.com")?;
let tracked_file = repo_path.join("secret.txt");
fs::create_dir_all(tracked_file.parent().unwrap())?;
fs::write(&tracked_file, b"super-secret")?;
let mut index = repo.index()?;
index.add_path(Path::new("secret.txt"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let commit_id = repo.commit(Some("HEAD"), &signature, &signature, "initial", &tree, &[])?;
let commit = repo.find_commit(commit_id)?;
repo.branch("featurefake", &commit, true)?;
let git_dir = repo_path.join(".git");
let gix_repo = open_opts(&git_dir, Options::isolated().open_path_as_is(true))?;
let result = enumerate_git_diff_repo(
&repo_path,
gix_repo,
GitDiffConfig { since_ref: None, branch_ref: "featurefake".to_string() },
None,
false,
)?;
assert_eq!(result.blobs.len(), 1, "expected the full branch tree to be enumerated");
let blob = &result.blobs[0];
assert_eq!(blob.first_seen.len(), 1);
let appearance_path = blob.first_seen[0].path.to_str_lossy();
assert_eq!(appearance_path, "secret.txt");
Ok(())
}
}
/// A simple enum describing how we yield file content: