diff --git a/docs-site/overrides/main.html b/docs-site/overrides/main.html
index 2804a17..3d2331f 100644
--- a/docs-site/overrides/main.html
+++ b/docs-site/overrides/main.html
@@ -7,7 +7,7 @@
"@context": "https://schema.org",
"@type": "SoftwareApplication",
"name": "Kingfisher",
- "description": "Open source secret scanner with live validation. 945 detection rules, blast radius mapping, and credential revocation.",
+ "description": "Open source secret scanner with live validation. 950 detection rules, blast radius mapping, and credential revocation.",
"applicationCategory": "DeveloperApplication",
"operatingSystem": "Linux, macOS, Windows",
"license": "https://opensource.org/licenses/Apache-2.0",
diff --git a/docs-site/scripts/prepare-docs.py b/docs-site/scripts/prepare-docs.py
index 37fca39..8dea79e 100644
--- a/docs-site/scripts/prepare-docs.py
+++ b/docs-site/scripts/prepare-docs.py
@@ -108,6 +108,7 @@ LINK_REWRITES = {
"ADVANCED.md": "../usage/advanced.md",
"BASELINE.md": "../usage/baseline.md",
"DEPLOYMENT.md": "../usage/deployment.md",
+ "CONFIG.md": "../usage/configuration.md",
"ACCESS_MAP.md": "../features/access-map.md",
"REVOCATION_PROVIDERS.md": "../features/revocation.md",
"TOKEN_REVOCATION_SUPPORT.md": "../features/revocation.md",
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index c9ed36d..7c2ab14 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -7,11 +7,20 @@ concatenated); scalars are **default-only** — a config value applies only when
the user did not pass the matching `--flag`. This keeps CI overrides
predictable and makes the CLI authoritative.
-## Discovery
+## Loading a config
-- `--config FILE` overrides everything; an explicit path that fails to parse is fatal.
-- Otherwise Kingfisher walks up from the current working directory looking for
- `kingfisher.yaml`. Missing config is silent.
+Kingfisher does **not** auto-discover `kingfisher.yaml`. The file is loaded
+only when you pass `--config FILE` explicitly:
+
+```bash
+kingfisher scan . --config ./kingfisher.yaml
+```
+
+A missing or malformed file is a fatal error — there is no silent fallback,
+so a typo in the path or a broken YAML block fails fast instead of running
+with surprising defaults. Auto-discovery was rejected because it makes scan
+results depend on where the binary was launched from, which is too easy to
+get wrong in CI.
## Precedence
@@ -25,14 +34,16 @@ but redundant. The one nuance: `rules.enabled` *replaces* the synthetic
`["all"]` default when you don't pass `--rule`, so a config that lists
`["custom"]` actually narrows the selection.
-## Generating a config from an existing CLI invocation
+## End-to-end: create a config and scan with it
-Don't write the YAML by hand. If you already have a long
-`kingfisher scan ...` command (or a CI step assembling flags), run the same
-flags under `kingfisher config init` and capture the YAML:
+### Step 1 — generate the config
+
+Don't write the YAML by hand. Take your existing `kingfisher scan ...`
+command (or the CI step that builds it) and run the same flags under
+`kingfisher config init`:
```bash
-# Print to stdout, redirect to file
+# Print to stdout, redirect to file:
kingfisher config init \
--confidence high \
--redact \
@@ -45,9 +56,8 @@ kingfisher config init \
--tls-mode lax \
> kingfisher.yaml
-# Or write directly:
+# Or write the file directly (pass --force to overwrite):
kingfisher config init [...flags...] --out kingfisher.yaml
-# Pass --force to overwrite an existing file.
```
Only flags you actually supply appear in the output; clap defaults are
@@ -55,6 +65,20 @@ omitted to keep the file minimal. Scan-target inputs (paths, `--git-url`,
GitHub/GitLab/etc. flags, S3/GCS buckets) are stripped — they describe
*what* this run scans and don't belong in shared project policy.
+### Step 2 — run the scan, passing the config explicitly
+
+```bash
+kingfisher scan . --config ./kingfisher.yaml
+```
+
+`--config FILE` is required: there is no auto-discovery. CLI flags can
+still override any individual value for a single run:
+
+```bash
+kingfisher scan . --config ./kingfisher.yaml --confidence low
+# scan.confidence: high in YAML → CLI flag wins, runs at low confidence
+```
+
## Webhook URL policy
`alerts.webhooks[].url` (and `--alert-webhook URL`) **must use `https://`**.
@@ -184,6 +208,8 @@ git:
keep_clones: false # bool (--keep-clones)
repo_clone_limit: null # int (--repo-clone-limit)
include_contributors: false # bool (--include-contributors)
+ github_api_url: null # URL GHE / self-hosted GH (--github-api-url)
+ gitlab_api_url: null # URL self-hosted GitLab (--gitlab-api-url)
```
Unknown fields are rejected (typo protection). Empty sections and a missing
@@ -191,15 +217,20 @@ top-level file are both fine.
## Example: CI workflow
-```yaml
-# .github/workflows/secrets.yml
-- uses: mongodb/kingfisher/.github/actions/kingfisher@main
- with:
- config: ./kingfisher.yaml
- alert-webhook: ${{ secrets.SLACK_SECURITY_WEBHOOK }}
+A typical `kingfisher.yaml` for a CI repo, paired with a workflow step
+that runs `kingfisher scan` against it:
+
+```bash
+# .github/workflows/secrets.yml — run step
+kingfisher scan . \
+ --config ./kingfisher.yaml \
+ --alert-webhook "$SLACK_SECURITY_WEBHOOK"
+# `--alert-webhook` here is appended to any webhooks already in
+# kingfisher.yaml (lists are additive). Everything else comes from the
+# config file.
```
-A typical `kingfisher.yaml` for a CI repo:
+The committed `kingfisher.yaml`:
```yaml
scan:
diff --git a/docs/USAGE.md b/docs/USAGE.md
index 915fe49..74f6ed4 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -420,11 +420,31 @@ kingfisher scan ./my-project \
### Project configuration file (`kingfisher.yaml`)
Most `kingfisher scan` flags can be set as project defaults via a
-`kingfisher.yaml` file in the repo root (or any ancestor directory). CLI
-flags always win; config values fill in defaults. Lists are concatenated.
+`kingfisher.yaml` file. CLI flags always win; config values fill in
+defaults. Lists are concatenated.
+
+The config file is **never auto-discovered** — pass `--config FILE`
+explicitly or it is not loaded.
+
+**Step 1 — generate the config from your existing CLI command** (don't
+write the YAML by hand):
+
+```bash
+kingfisher config init \
+ --confidence high \
+ --redact \
+ --exclude vendor/ \
+ --exclude '**/node_modules/**' \
+ --format sarif \
+ --output ./kingfisher.sarif \
+ --alert-webhook https://hooks.slack.com/services/T0/B0/AAA \
+ > kingfisher.yaml
+```
+
+The resulting `kingfisher.yaml`:
```yaml
-# kingfisher.yaml
+# kingfisher.yaml — generated by `kingfisher config init`.
scan:
confidence: high
redact: true
@@ -438,21 +458,19 @@ filters:
alerts:
webhooks:
- url: https://hooks.slack.com/services/T0/B0/AAA
- format: slack
```
+**Step 2 — run the scan, passing the config explicitly:**
+
```bash
-kingfisher scan . # auto-discovers ./kingfisher.yaml
-kingfisher scan . --config /etc/kf.yaml # explicit path
+kingfisher scan . --config ./kingfisher.yaml
```
-Don't write the YAML by hand. If you already have a long `kingfisher scan`
-command, run the same flags under `kingfisher config init` to generate it:
+You can override any config value on the CLI for a single run:
```bash
-kingfisher config init \
- --confidence high --redact --exclude vendor/ --format sarif \
- > kingfisher.yaml
+kingfisher scan . --config ./kingfisher.yaml --confidence low
+# scan.confidence: high in YAML → CLI flag wins, runs at low confidence
```
See [`docs/CONFIG.md`](CONFIG.md) for the full schema and precedence rules.
@@ -723,6 +741,60 @@ kingfisher scan https://github.com/org/repo.git --repo-artifacts
KF_GITHUB_TOKEN="ghp_…" kingfisher scan https://github.com/org/private_repo.git --repo-artifacts
```
+### Scan a GitHub Enterprise / self-hosted GitHub instance
+
+For GitHub Enterprise Server (GHES) or any self-hosted GitHub install, you
+need two flags:
+
+- `--github-api-url
` — points the **enumeration / clone** flow at the
+ custom API root (typically `https://ghe.example.com/api/v3/`).
+- `--endpoint github=` — points the **token validation / revocation**
+ flow at the same instance, so any GitHub PATs Kingfisher discovers in the
+ scanned source are checked against your GHE rather than `api.github.com`.
+
+```bash
+# 1. Scan every org repo on GHE and validate discovered tokens against the same instance
+KF_GITHUB_TOKEN="ghp_…" kingfisher scan github \
+ --organization my-org \
+ --github-api-url https://ghe.corp.example.com/api/v3/ \
+ --endpoint github=https://ghe.corp.example.com
+
+# 2. Scan a single GHE repo by URL (positional target)
+KF_GITHUB_TOKEN="ghp_…" kingfisher scan https://ghe.corp.example.com/org/repo.git \
+ --endpoint github=https://ghe.corp.example.com
+
+# 3. Scan ALL orgs on a GHE instance (requires non-default --github-api-url)
+KF_GITHUB_TOKEN="ghp_…" kingfisher scan github \
+ --all-orgs \
+ --github-api-url https://ghe.corp.example.com/api/v3/ \
+ --endpoint github=https://ghe.corp.example.com
+
+# 4. GHE on a private network — add --allow-internal-ips so the validator
+# can reach RFC1918 / loopback hosts (SSRF guard is on by default).
+KF_GITHUB_TOKEN="ghp_…" kingfisher scan github \
+ --organization my-org \
+ --github-api-url https://ghe.internal/api/v3/ \
+ --endpoint github=https://ghe.internal \
+ --allow-internal-ips
+
+# 5. Validate a single PAT against GHE without scanning anything
+kingfisher validate --rule github \
+ --endpoint github=https://ghe.corp.example.com \
+ "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+# 6. Revoke (delete) a confirmed-leaked PAT against GHE
+kingfisher revoke --rule github \
+ --endpoint github=https://ghe.corp.example.com \
+ "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+```
+
+> **Why two URLs?** `--github-api-url` is the GHE *cloning* root that
+> Kingfisher walks to enumerate orgs, repos, and contributors.
+> `--endpoint github=…` is the *validator* root used to live-check
+> discovered tokens. They are usually the same host, but they're separate
+> flags because some deployments front-load auth (an SSO portal for repo
+> access vs. a direct API endpoint for token validation).
+
---
## GitLab
@@ -783,6 +855,99 @@ kingfisher scan https://gitlab.com/group/project.git --repo-artifacts
KF_GITLAB_TOKEN="glpat-…" kingfisher scan https://gitlab.com/group/private_project.git --repo-artifacts
```
+### Scan a self-hosted (Omnibus / Cloud Native) GitLab instance
+
+For GitLab self-hosted (Omnibus, Helm, or Cloud Native), pair the
+enumeration flag with a matching validation endpoint, just like with GHE:
+
+- `--gitlab-api-url ` — points the **enumeration / clone** flow at
+ the custom GitLab root (typically `https://gitlab.example.com/`).
+- `--endpoint gitlab=` — points the **token validation / revocation**
+ flow at the same instance, so any GitLab PATs found in the scanned
+ source are checked against your self-hosted GitLab rather than
+ `gitlab.com`.
+
+```bash
+# 1. Scan a self-hosted group and validate discovered tokens against the same instance
+KF_GITLAB_TOKEN="glpat-…" kingfisher scan gitlab \
+ --group my-group \
+ --include-subgroups \
+ --gitlab-api-url https://gitlab.corp.example.com/ \
+ --endpoint gitlab=https://gitlab.corp.example.com
+
+# 2. Scan a single self-hosted GitLab project by URL
+KF_GITLAB_TOKEN="glpat-…" kingfisher scan https://gitlab.corp.example.com/group/project.git \
+ --endpoint gitlab=https://gitlab.corp.example.com
+
+# 3. Scan ALL groups on a self-hosted GitLab (requires non-default --gitlab-api-url)
+KF_GITLAB_TOKEN="glpat-…" kingfisher scan gitlab \
+ --all-groups \
+ --gitlab-api-url https://gitlab.corp.example.com/ \
+ --endpoint gitlab=https://gitlab.corp.example.com
+
+# 4. Self-hosted GitLab on a private network — add --allow-internal-ips so
+# the validator can reach RFC1918 / loopback hosts.
+KF_GITLAB_TOKEN="glpat-…" kingfisher scan gitlab \
+ --group my-group \
+ --gitlab-api-url https://gitlab.internal/ \
+ --endpoint gitlab=https://gitlab.internal \
+ --allow-internal-ips
+
+# 5. Validate a single PAT against self-hosted GitLab without scanning anything
+kingfisher validate --rule gitlab \
+ --endpoint gitlab=https://gitlab.corp.example.com \
+ "glpat-xxxxxxxxxxxxxxxxxxxx"
+
+# 6. Revoke (delete) a confirmed-leaked PAT against self-hosted GitLab
+kingfisher revoke --rule gitlab \
+ --endpoint gitlab=https://gitlab.corp.example.com \
+ "glpat-xxxxxxxxxxxxxxxxxxxx"
+```
+
+### Many endpoints at once: `--endpoint-config`
+
+If you maintain a fleet of self-hosted instances (GHE, self-hosted GitLab,
+Gitea, Jira DC, Confluence, Artifactory), put them in a single YAML file
+and reference it instead of repeating `--endpoint` on every command:
+
+```yaml
+# kingfisher-endpoints.yml
+endpoints:
+ github: https://ghe.corp.example.com
+ gitlab: https://gitlab.corp.example.com
+ gitea: https://gitea.corp.example.com
+ jira: https://jira.corp.example.com
+ confluence: https://wiki.corp.example.com
+ artifactory: http://artifactory.internal:8081
+```
+
+```bash
+KF_GITHUB_TOKEN="ghp_…" KF_GITLAB_TOKEN="glpat-…" kingfisher scan github \
+ --organization my-org \
+ --github-api-url https://ghe.corp.example.com/api/v3/ \
+ --endpoint-config ./kingfisher-endpoints.yml \
+ --allow-internal-ips
+```
+
+### Tip: bake the endpoints into `kingfisher.yaml`
+
+Once you've worked out the right flags, capture them as project defaults
+so every scan uses the same config:
+
+```bash
+kingfisher config init \
+ --github-api-url https://ghe.corp.example.com/api/v3/ \
+ --gitlab-api-url https://gitlab.corp.example.com/ \
+ --endpoint github=https://ghe.corp.example.com \
+ --endpoint gitlab=https://gitlab.corp.example.com \
+ --allow-internal-ips \
+ > kingfisher.yaml
+
+# Then every scan inherits the same self-hosted defaults:
+KF_GITHUB_TOKEN="ghp_…" kingfisher scan github --organization my-org \
+ --config ./kingfisher.yaml
+```
+
### List GitLab repositories
```bash
diff --git a/src/cli/config.rs b/src/cli/config.rs
index 7174cd6..7e8f1e8 100644
--- a/src/cli/config.rs
+++ b/src/cli/config.rs
@@ -61,7 +61,9 @@ use crate::cli::commands::output::ReportOutputFormat;
use crate::cli::commands::scan::ConfidenceLevel;
use crate::cli::global::TlsMode;
-/// File name auto-discovered when the user does not pass `--config`.
+/// Conventional file name when users save a project-local config. The path
+/// must still be passed explicitly via `--config`; nothing in the binary
+/// auto-loads a file with this name.
pub const DEFAULT_CONFIG_NAME: &str = "kingfisher.yaml";
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
@@ -252,7 +254,7 @@ pub struct GlobalConfig {
}
// ----------------------------------------------------------------------------
-// git: clone behavior for git scans.
+// git: clone behavior + provider API roots for git scans.
// ----------------------------------------------------------------------------
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
@@ -261,6 +263,17 @@ pub struct GitConfig {
pub keep_clones: Option,
pub repo_clone_limit: Option,
pub include_contributors: Option,
+ /// GitHub Enterprise / self-hosted GitHub API root used during enumeration
+ /// and cloning. Equivalent to `--github-api-url` on the bare `scan` form
+ /// or `--api-url` on `kingfisher scan github`. For *validation* of
+ /// discovered tokens against the same instance, set
+ /// `global.endpoints` (e.g. `github=https://ghe.example.com`).
+ pub github_api_url: Option,
+ /// Self-hosted GitLab API root used during enumeration and cloning.
+ /// Equivalent to `--gitlab-api-url`. Pair with a matching
+ /// `global.endpoints` `gitlab=...` entry to also redirect token
+ /// validation to the same instance.
+ pub gitlab_api_url: Option,
}
// ----------------------------------------------------------------------------
@@ -364,10 +377,6 @@ impl From for ConfigReportFormat {
}
}
-/// Cap on `discover_path` upward walks. Avoids unbounded directory traversal
-/// on networked filesystems or pathological mount layouts.
-const DISCOVER_MAX_DEPTH: usize = 32;
-
/// Parse YAML text into a config struct, validating webhook URLs, regex
/// patterns, range-bounded scalars, and endpoint formats so config errors
/// surface at the `--config` site rather than mid-scan.
@@ -440,33 +449,22 @@ fn validate(cfg: &KingfisherConfig) -> Result<()> {
.with_context(|| format!("global.endpoints[{idx}] URL is not valid"))?;
}
+ // git.github_api_url / git.gitlab_api_url — must parse as URLs.
+ if let Some(u) = &cfg.git.github_api_url {
+ url::Url::parse(u).context("git.github_api_url is not a valid URL")?;
+ }
+ if let Some(u) = &cfg.git.gitlab_api_url {
+ url::Url::parse(u).context("git.gitlab_api_url is not a valid URL")?;
+ }
+
// alerts.defaults.report_url already checked above.
Ok(())
}
-/// Walk upward from `start` looking for `kingfisher.yaml` in each ancestor
-/// directory. Returns the absolute path when found. Performs *no* file reads —
-/// the caller does the read once it has decided which file to use. Capped at
-/// [`DISCOVER_MAX_DEPTH`] levels to bound the walk on networked filesystems.
-pub fn discover_path(start: &std::path::Path) -> Option {
- let mut current = start.to_path_buf();
- for _ in 0..=DISCOVER_MAX_DEPTH {
- let candidate = current.join(DEFAULT_CONFIG_NAME);
- if candidate.is_file() {
- return Some(candidate);
- }
- if !current.pop() {
- return None;
- }
- }
- None
-}
-
#[cfg(test)]
mod tests {
use super::*;
- use tempfile::TempDir;
#[test]
fn parse_minimal_alerts() {
@@ -547,6 +545,8 @@ git:
clone_dir: "./clones"
keep_clones: true
repo_clone_limit: 50
+ github_api_url: https://ghe.example.com/api/v3/
+ gitlab_api_url: https://gitlab.example.com/
"#;
let cfg = parse_str(yaml).unwrap();
assert!(matches!(cfg.scan.confidence, Some(ConfigConfidence::High)));
@@ -569,6 +569,20 @@ git:
assert_eq!(cfg.global.endpoints.len(), 1);
assert_eq!(cfg.git.clone_dir.as_deref().map(|p| p.to_str().unwrap()), Some("./clones"));
assert_eq!(cfg.git.keep_clones, Some(true));
+ assert_eq!(cfg.git.github_api_url.as_deref(), Some("https://ghe.example.com/api/v3/"));
+ assert_eq!(cfg.git.gitlab_api_url.as_deref(), Some("https://gitlab.example.com/"));
+ }
+
+ #[test]
+ fn invalid_git_github_api_url_is_rejected() {
+ let err = parse_str("git:\n github_api_url: 'not_a_url'\n").unwrap_err();
+ assert!(format!("{err:#}").contains("git.github_api_url"));
+ }
+
+ #[test]
+ fn invalid_git_gitlab_api_url_is_rejected() {
+ let err = parse_str("git:\n gitlab_api_url: 'also not a url'\n").unwrap_err();
+ assert!(format!("{err:#}").contains("git.gitlab_api_url"));
}
#[test]
@@ -700,25 +714,4 @@ git: {}
let err = parse_str("global:\n endpoints:\n - '=https://example.com/'\n").unwrap_err();
assert!(format!("{err:#}").contains("global.endpoints[0]"));
}
-
- #[test]
- fn discover_walks_upward() {
- let temp = TempDir::new().unwrap();
- let nested = temp.path().join("a/b/c");
- std::fs::create_dir_all(&nested).unwrap();
- let cfg_path = temp.path().join(DEFAULT_CONFIG_NAME);
- std::fs::write(&cfg_path, "alerts: { webhooks: [] }\n").unwrap();
- let found = discover_path(&nested).unwrap();
- assert_eq!(
- std::fs::canonicalize(&found).unwrap(),
- std::fs::canonicalize(&cfg_path).unwrap()
- );
- }
-
- #[test]
- fn discover_returns_none_when_absent() {
- let temp = TempDir::new().unwrap();
- let found = discover_path(temp.path());
- assert!(found.is_none());
- }
}
diff --git a/src/cli/global.rs b/src/cli/global.rs
index ddb852e..cceca20 100644
--- a/src/cli/global.rs
+++ b/src/cli/global.rs
@@ -170,9 +170,10 @@ pub struct GlobalArgs {
pub endpoint_config: Option,
/// Path to a `kingfisher.yaml` project config file.
- /// If omitted, Kingfisher walks up from the current working directory
- /// looking for `kingfisher.yaml`. List-typed config values are concatenated
- /// onto matching CLI flags; scalar config values are applied only when the
+ ///
+ /// **No auto-discovery** — the file is loaded only when this flag is
+ /// passed explicitly. List-typed config values are concatenated onto
+ /// matching CLI flags; scalar config values are applied only when the
/// matching `--flag` was not passed (precedence: CLI > env > config >
/// built-in default). See `docs/CONFIG.md` for the full schema.
#[arg(global = true, long = "config", value_name = "FILE")]
diff --git a/src/main.rs b/src/main.rs
index c66e698..dd04318 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -298,31 +298,24 @@ fn setup_logging(global_args: &GlobalArgs) {
}
/// Resolve and read a `kingfisher.yaml` project config.
-/// - Explicit `--config ` is required to exist; missing/unreadable is an error.
-/// - Otherwise we walk up from CWD looking for `kingfisher.yaml`. Missing is fine.
+///
+/// The config file is loaded **only** when the user passes `--config `
+/// explicitly. There is intentionally no auto-discovery — relying on a
+/// `kingfisher.yaml` that happens to sit in the cwd (or any ancestor
+/// directory) makes scan results depend on where the binary was invoked
+/// from, which is too easy to get wrong in CI. If the explicit path is
+/// missing or fails to parse, that is a fatal error.
fn load_project_config(
explicit: Option<&std::path::Path>,
) -> Result