From 6f06b1acb3da3aa94f578ea212f6e56756c46253 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 22 Aug 2025 13:26:54 -0700 Subject: [PATCH 01/12] mproved AWS rule --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- data/rules/aws.yml | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db2e20b..84613ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.46.0] +- Improved AWS rule + ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` - Added rules for sendbird, mattermost, langchain, notion diff --git a/Cargo.toml b/Cargo.toml index 9ebd262..b906c05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.45.0" +version = "1.46.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/aws.yml b/data/rules/aws.yml index 49fcbe3..62041da 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -5,7 +5,7 @@ rules: (?xi) \b ( - (?:AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) [2-7A-Z]{16} ) \b @@ -21,7 +21,7 @@ rules: (?xi) (?: \b - (?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) (?:.|[\n\r]){0,32}? \b ( @@ -29,7 +29,7 @@ rules: ) \b | - \b(?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + \b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) (?:.|[\n\r]){0,96}? (?:SECRET|PRIVATE|ACCESS) (?:.|[\n\r]){0,16}? From 96293385f534575f782815cadf6bf72df4273aa4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 22 Aug 2025 16:16:00 -0700 Subject: [PATCH 02/12] - Improved rules: AWS, pem --- CHANGELOG.md | 2 +- data/rules/pem.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84613ca..e6a03df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] -- Improved AWS rule +- Improved rules: AWS, pem ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` diff --git a/data/rules/pem.yml b/data/rules/pem.yml index 390171d..00d93c6 100644 --- a/data/rules/pem.yml +++ b/data/rules/pem.yml @@ -55,6 +55,7 @@ rules: (?: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0t (?# prefix of base64 encoding of `-----BEGIN RSA PRIVATE KEY-----` ) | LS0tLS1CRUdJTiBEU0EgUFJJVkFURSBLRVktLS0t (?# prefix of base64 encoding of `-----BEGIN DSA PRIVATE KEY-----` ) | LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0t (?# prefix of base64 encoding of `-----BEGIN EC PRIVATE KEY-----` ) + | LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0t (?# prefix of base64 encoding of `-----BEGIN PRIVATE KEY-----` ) ) [a-zA-Z0-9+/=]{50,} ) From 2f1385f5f1ff58a3ad6391f55afc80e94aad6b66 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 22 Aug 2025 17:26:48 -0700 Subject: [PATCH 03/12] Added a new install-precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check --- CHANGELOG.md | 1 + README.md | 8 + src/cli/commands/mod.rs | 1 + src/cli/commands/precommit.rs | 265 ++++++++++++++++++++++++++++++++++ src/cli/global.rs | 7 +- src/main.rs | 5 + 6 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 src/cli/commands/precommit.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e6a03df..e666552 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem +- Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` diff --git a/README.md b/README.md index 56e5f18..272c67a 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,14 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` +### Install as a Git pre-commit hook + +Run `kingfisher precommit --install` to set up a Git pre-commit hook that runs +`kingfisher --quiet --only-valid --no-update-check` before each commit. +Use `--global` to operate on all repositories or `--repo` to target only the +current repository without prompting. Remove the hook with +`kingfisher precommit --remove`. + ### Run Kingfisher in Docker Run the dockerized Kingfisher container: diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index c73ec82..ec48c0f 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -2,5 +2,6 @@ pub mod github; pub mod gitlab; pub mod inputs; pub mod output; +pub mod precommit; pub mod rules; pub mod scan; diff --git a/src/cli/commands/precommit.rs b/src/cli/commands/precommit.rs new file mode 100644 index 0000000..a6a1f0a --- /dev/null +++ b/src/cli/commands/precommit.rs @@ -0,0 +1,265 @@ +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::{env, fs}; + +use anyhow::{anyhow, Context, Result}; +use clap::{ArgAction, ArgGroup, Args}; + +use crate::gix; + +/// Arguments for `precommit` command +#[derive(Args, Debug, Clone)] +#[command(group( + ArgGroup::new("action") + .args(["install", "remove"]) + .required(true) + .multiple(false) +))] +pub struct PrecommitArgs { + /// Install the pre-commit hook + #[arg(long, action = ArgAction::SetTrue, conflicts_with = "remove")] + pub install: bool, + + /// Remove the pre-commit hook + #[arg(long, action = ArgAction::SetTrue, conflicts_with = "install")] + pub remove: bool, + + /// Operate on all repositories using the global hooks directory + #[arg(long, conflicts_with = "repo")] + pub global: bool, + + /// Operate only on the current repository + #[arg(long, conflicts_with = "global")] + pub repo: bool, +} + +/// Scope of operation +enum Scope { + Global, + Repo, +} + +/// Run the `precommit` command +pub fn run(args: &PrecommitArgs) -> Result<()> { + if args.install { + if let Some(path) = find_existing_hook()? { + println!("Kingfisher pre-commit hook already installed at {}", path.display()); + return Ok(()); + } + let scope = determine_scope(args, true)?; + let hook_path = match scope { + Scope::Global => install_global()?, + Scope::Repo => install_repo()?, + }; + println!("Installed Kingfisher pre-commit hook at {}", hook_path.display()); + } else if args.remove { + let scope = determine_scope(args, false)?; + let removed = match scope { + Scope::Global => remove_global()?, + Scope::Repo => remove_repo()?, + }; + if let Some(path) = removed { + println!("Removed Kingfisher pre-commit hook from {}", path.display()); + } else { + println!("No Kingfisher pre-commit hook found to remove"); + } + } + Ok(()) +} + +fn determine_scope(args: &PrecommitArgs, installing: bool) -> Result { + if args.global { + Ok(Scope::Global) + } else if args.repo { + Ok(Scope::Repo) + } else { + let verb = if installing { "Install" } else { "Remove" }; + prompt_scope(verb) + } +} + +fn prompt_scope(action: &str) -> Result { + print!("{} pre-commit hook globally? [y/N]: ", action); + io::stdout().flush()?; + let mut input = String::new(); + io::stdin().read_line(&mut input)?; + if matches!(input.trim().to_lowercase().as_str(), "y" | "yes") { + Ok(Scope::Global) + } else { + Ok(Scope::Repo) + } +} + +fn find_existing_hook() -> Result> { + // Check repo-local hook + if let Ok(repo) = gix::discover(".") { + let path = repo.path().join("hooks").join(hook_filename()); + if hook_contains_kingfisher(&path) { + return Ok(Some(path)); + } + } + + // Check global hook + if let Some(dir) = current_global_hooks_dir()? { + let path = dir.join(hook_filename()); + if hook_contains_kingfisher(&path) { + return Ok(Some(path)); + } + } + + Ok(None) +} + +fn install_repo() -> Result { + let repo = gix::discover(".").context("Not inside a git repository")?; + let hooks_dir = repo.path().join("hooks"); + fs::create_dir_all(&hooks_dir)?; + let hook_path = hooks_dir.join(hook_filename()); + write_hook(&hook_path)?; + Ok(hook_path) +} + +fn install_global() -> Result { + let hooks_dir = get_or_set_global_hooks_dir()?; + let hook_path = hooks_dir.join(hook_filename()); + write_hook(&hook_path)?; + Ok(hook_path) +} + +fn remove_repo() -> Result> { + let repo = gix::discover(".").context("Not inside a git repository")?; + let hook_path = repo.path().join("hooks").join(hook_filename()); + if remove_hook(&hook_path)? { + Ok(Some(hook_path)) + } else { + Ok(None) + } +} + +fn remove_global() -> Result> { + if let Some(dir) = current_global_hooks_dir()? { + let hook_path = dir.join(hook_filename()); + if remove_hook(&hook_path)? { + return Ok(Some(hook_path)); + } + } + Ok(None) +} + +fn write_hook(path: &Path) -> Result<()> { + if path.exists() { + let content = fs::read_to_string(path)?; + if content.contains("kingfisher") { + println!("Kingfisher pre-commit hook already installed at {}", path.display()); + return Ok(()); + } + let mut file = fs::OpenOptions::new().append(true).open(path)?; + if !content.ends_with('\n') { + writeln!(file)?; + } + writeln!(file, "{}", hook_call_line())?; + } else { + fs::write(path, hook_content())?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(path, perms)?; + } + } + Ok(()) +} + +fn remove_hook(path: &Path) -> Result { + if !path.exists() { + return Ok(false); + } + let content = fs::read_to_string(path)?; + if !content.contains("kingfisher") { + return Ok(false); + } + let ending = if cfg!(windows) { "\r\n" } else { "\n" }; + let lines: Vec<&str> = content.lines().filter(|l| !l.contains("kingfisher")).collect(); + if lines.is_empty() { + fs::remove_file(path)?; + } else { + let mut new_content = lines.join(ending); + new_content.push_str(ending); + fs::write(path, new_content)?; + } + Ok(true) +} + +fn hook_contains_kingfisher(path: &Path) -> bool { + fs::read_to_string(path).map(|c| c.contains("kingfisher")).unwrap_or(false) +} + +fn hook_filename() -> &'static str { + if cfg!(windows) { + "pre-commit.bat" + } else { + "pre-commit" + } +} + +fn hook_content() -> String { + if cfg!(windows) { + format!("@echo off\r\n{}\r\n", hook_call_line()) + } else { + format!("#!/bin/sh\n{}\n", hook_call_line()) + } +} + +fn hook_call_line() -> String { + if cfg!(windows) { + "kingfisher --quiet --only-valid --no-update-check %*".to_string() + } else { + "kingfisher --quiet --only-valid --no-update-check \"$@\"".to_string() + } +} + +fn current_global_hooks_dir() -> Result> { + let output = + Command::new("git").args(["config", "--global", "--get", "core.hooksPath"]).output()?; + if output.status.success() { + let p = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if p.is_empty() { + Ok(None) + } else { + Ok(Some(PathBuf::from(p))) + } + } else { + Ok(None) + } +} + +fn get_or_set_global_hooks_dir() -> Result { + if let Some(dir) = current_global_hooks_dir()? { + fs::create_dir_all(&dir)?; + return Ok(dir); + } + + let home = home_dir().ok_or_else(|| anyhow!("Unable to determine home directory"))?; + let hooks = home.join(".githooks"); + fs::create_dir_all(&hooks)?; + Command::new("git") + .args([ + "config", + "--global", + "core.hooksPath", + hooks.to_str().ok_or_else(|| anyhow!("Invalid path"))?, + ]) + .status() + .context("Failed to set git global core.hooksPath")?; + Ok(hooks) +} + +fn home_dir() -> Option { + if cfg!(windows) { + env::var_os("USERPROFILE").map(PathBuf::from) + } else { + env::var_os("HOME").map(PathBuf::from) + } +} diff --git a/src/cli/global.rs b/src/cli/global.rs index 93599b7..4116ddd 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -7,7 +7,8 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use tracing::Level; use crate::cli::commands::{ - github::GitHubArgs, gitlab::GitLabArgs, rules::RulesArgs, scan::ScanArgs, + github::GitHubArgs, gitlab::GitLabArgs, precommit::PrecommitArgs, rules::RulesArgs, + scan::ScanArgs, }; #[deny(missing_docs)] @@ -62,6 +63,10 @@ pub enum Command { /// Manage rules #[command(alias = "rule")] Rules(RulesArgs), + + /// Manage Kingfisher as a Git pre-commit hook + #[command(name = "precommit")] + Precommit(PrecommitArgs), } pub static RAM_GB: Lazy> = Lazy::new(|| { diff --git a/src/main.rs b/src/main.rs index 38c0a88..7ef0e9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,6 +69,7 @@ use tracing_subscriber::{ use url::Url; use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; +use crate::cli::commands::precommit; fn main() -> anyhow::Result<()> { color_backtrace::install(); @@ -81,6 +82,7 @@ fn main() -> anyhow::Result<()> { Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands Command::Rules(_) => num_cpus::get(), // Default for Rules commands + Command::Precommit(_) => num_cpus::get(), }; // Set up the Tokio runtime with the specified number of threads @@ -219,6 +221,9 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { run_rules_list(&list_args)?; } }, + Command::Precommit(pre_args) => { + precommit::run(&pre_args)?; + } Command::GitHub(github_args) => match github_args.command { GitHubCommand::Repos(repos_command) => match repos_command { GitHubReposCommand::List(list_args) => { From 7f3846c8e73977b38458d2ad5a49c4e87e258b9f Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 22 Aug 2025 17:33:03 -0700 Subject: [PATCH 04/12] Added a new install-precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check --- README.md | 2 +- src/cli/commands/precommit.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 272c67a..32eaac2 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ make all # builds for every OS and architecture supported ### Install as a Git pre-commit hook Run `kingfisher precommit --install` to set up a Git pre-commit hook that runs -`kingfisher --quiet --only-valid --no-update-check` before each commit. +`kingfisher scan --quiet --only-valid --no-update-check` before each commit. Use `--global` to operate on all repositories or `--repo` to target only the current repository without prompting. Remove the hook with `kingfisher precommit --remove`. diff --git a/src/cli/commands/precommit.rs b/src/cli/commands/precommit.rs index a6a1f0a..4589d18 100644 --- a/src/cli/commands/precommit.rs +++ b/src/cli/commands/precommit.rs @@ -214,9 +214,9 @@ fn hook_content() -> String { fn hook_call_line() -> String { if cfg!(windows) { - "kingfisher --quiet --only-valid --no-update-check %*".to_string() + "kingfisher scan --quiet --only-valid --no-update-check %*".to_string() } else { - "kingfisher --quiet --only-valid --no-update-check \"$@\"".to_string() + "kingfisher scan --quiet --only-valid --no-update-check \"$@\"".to_string() } } From bbbb0f33bb56119ce689aa7dc37b7b2d2e61326e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 26 Aug 2025 10:22:18 -0700 Subject: [PATCH 05/12] added ollama rule --- CHANGELOG.md | 1 + data/rules/mailgun.yml | 4 ++-- data/rules/ollama.yml | 47 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 data/rules/ollama.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index e666552..dc969e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem +- Added rule for Ollama - Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] diff --git a/data/rules/mailgun.yml b/data/rules/mailgun.yml index c17c40b..06a02c2 100644 --- a/data/rules/mailgun.yml +++ b/data/rules/mailgun.yml @@ -2,8 +2,8 @@ rules: - name: MailGun Token id: kingfisher.mailgun.1 pattern: | - (?xi) - \b + (?xi) + \b mailgun (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) diff --git a/data/rules/ollama.yml b/data/rules/ollama.yml new file mode 100644 index 0000000..a2df4ab --- /dev/null +++ b/data/rules/ollama.yml @@ -0,0 +1,47 @@ +rules: + - name: Ollama API Key + id: kingfisher.ollama.1 + pattern: | + (?xi) + \b + ollama + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{32}\.[a-zA-Z0-9_-]{24} + ) + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: POST + url: https://ollama.com/api/generate + headers: + Content-Type: application/json + # Turbo keys are sent as the raw value in Authorization (no "Bearer " prefix) + # per working client behavior. + Authorization: "{{ TOKEN }}" + body: | + { + "model": "gpt-oss:20b", + "prompt": "ping", + "stream": false + } + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"response":' + - '"done":true' + references: + - https://ollama.com/blog/turbo + examples: + - "ollama key = 8bcdd9b4e28e4e1b8bf14a2eb8701220.QH5p5TU2BDwzHu5_RCtvJXsj" + - "ollama key = e56714bd7c1146e4b4801244bc2bc67a.3GAswjZGZ5YY6Qdgt0xg56vM" + - "ollama key = 872658d00c284033a707abf1725d4b6c.-4JpTp0dQHmf0nb89xI-wgP-" + - "ollama key = 0c4e6bf1222c4ffc87025a7a9ffd5cac.z-fgt1JO9-LadzA2cL23qLH3" + - "ollama key = dae874a007d442cdb807910c4c57c6f5.B_aHUSdeAe42UR-X41StUFJq" \ No newline at end of file From 8135bf6b3773bb41d633b120c14f8523c68e94f2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 10:20:04 -0700 Subject: [PATCH 06/12] Added rule for 'weights and biases' --- CHANGELOG.md | 2 +- data/rules/weightsandbiases.yml | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 data/rules/weightsandbiases.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index dc969e4..8ffb1e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama +- Added rule for Ollama, Weights and Biases - Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] diff --git a/data/rules/weightsandbiases.yml b/data/rules/weightsandbiases.yml new file mode 100644 index 0000000..6661f53 --- /dev/null +++ b/data/rules/weightsandbiases.yml @@ -0,0 +1,37 @@ +rules: + - name: Weights and Biases API Key + id: kingfisher.wandb.1 + pattern: | + (?xi) + \b + (?:wandb|weightsandbiases) + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{40} + ) + \b + confidence: medium + min_entropy: 3.5 + examples: + - "export WANDB_API_KEY=872ab943740b34157041da2529fb160d89632710" + - "wandb_api_key: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b" + - "WeightsandBiases token => 7f9d2e34c1a0b5d6e7f81234abcd5678ef901234" + validation: + type: Http + content: + request: + method: POST + url: "https://api.wandb.ai/graphql" + headers: + Authorization: "Basic {{ 'api:' | append: TOKEN | b64enc }}" + Content-Type: "application/json" + body: | + {"query":"query { viewer { id } }"} + response_matcher: + - report_response: true + - type: JsonValid + - type: WordMatch + words: + - '"id"' From 49640c533894229a9cf2d2b14e27ddd344f8bd71 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 11:25:39 -0700 Subject: [PATCH 07/12] added rules for cerbras, friendli, fireworks.ai --- CHANGELOG.md | 2 +- data/rules/cerebras.yml | 36 ++++++++++++++++++++++++++++++++++++ data/rules/fireworksai.yml | 35 +++++++++++++++++++++++++++++++++++ data/rules/friendli.yml | 35 +++++++++++++++++++++++++++++++++++ data/rules/nvidia.yml | 0 5 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 data/rules/cerebras.yml create mode 100644 data/rules/fireworksai.yml create mode 100644 data/rules/friendli.yml create mode 100644 data/rules/nvidia.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ffb1e7..308ed0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama, Weights and Biases +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai - Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] diff --git a/data/rules/cerebras.yml b/data/rules/cerebras.yml new file mode 100644 index 0000000..bb5ad17 --- /dev/null +++ b/data/rules/cerebras.yml @@ -0,0 +1,36 @@ +rules: + - name: Cerebras AI API Key + id: kingfisher.cerebras.1 + pattern: | + (?xi) + \b + ( + csk-[a-z0-9]{48} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.cerebras.ai/v1/models" + headers: + Authorization: "Bearer {{TOKEN}}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"object"' + - '"data"' + match_all_words: true + references: + - https://docs.cerebras.net/ + examples: + - "csk-6nptf4w5cx36fw58t3hkx48jvm52wm693pex5tjm29kn55yt" + - "csk-e2knhj8h3h4erp6crfx6rh52tvecj4xnwmtjf3mtrvtt54et" + - "csk-rhw8npjrp6kpv9phm55n5nv5rkkm4492jepx3yh65dc9cwe9" + - "csk-w6p3nxk3`c5249mrpmv642fffert28rwdkepffrpn8rtfr9h" diff --git a/data/rules/fireworksai.yml b/data/rules/fireworksai.yml new file mode 100644 index 0000000..9ca431f --- /dev/null +++ b/data/rules/fireworksai.yml @@ -0,0 +1,35 @@ +rules: + - name: Fireworks.ai API Key + id: kingfisher.fireworks.1 + pattern: | + (?xi) + \b + ( + fw_[A-Z0-9]{24} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.fireworks.ai/inference/v1/models" + headers: + Authorization: "Bearer {{TOKEN}}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"owned_by"' + - '"data"' + match_all_words: true + references: + - https://readme.fireworks.ai/reference/getting-started-with-the-api + examples: + - "fw_3ZL5ji26Tp7baYrW5S2pA5xi" + - "fw_3ZaW5fSpx5GTnHpRGb8CPu2V" + - "fw_3ZSU8ymvmZ38YPv8uwbZHAyW" diff --git a/data/rules/friendli.yml b/data/rules/friendli.yml new file mode 100644 index 0000000..ec5f3ec --- /dev/null +++ b/data/rules/friendli.yml @@ -0,0 +1,35 @@ +rules: + - name: Friendli.ai API Key + id: kingfisher.friendli.1 + pattern: | + (?xi) + \b + ( + flp_[A-Z0-9]{46} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.friendli.ai/dedicated/beta/endpoint" + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"status"' + references: + - https://docs.friendli.ai/reference/authentication + examples: + - "flp_eb8CAc1OHdVISFraFZXFYQeH1CYtqM2VdYFvV1duniWw32" + - "flp_fYvncz2Ahh4YEfSKbNoT09DWlwPq5I7svZG2l1bdbpOg1c" + - "flp_kGcjWhZQ4zYQnY7b3O6nukAhflKZJeS7pNDhs79IRrfodc" diff --git a/data/rules/nvidia.yml b/data/rules/nvidia.yml new file mode 100644 index 0000000..e69de29 From 4194b013066074a78d201737c6de60f7c1294fd5 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 11:39:32 -0700 Subject: [PATCH 08/12] added rules for nvidia nim --- CHANGELOG.md | 2 +- data/rules/nvidia.yml | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 308ed0d..933d500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM - Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] diff --git a/data/rules/nvidia.yml b/data/rules/nvidia.yml index e69de29..25a90e1 100644 --- a/data/rules/nvidia.yml +++ b/data/rules/nvidia.yml @@ -0,0 +1,31 @@ +rules: + - name: NVIDIA NIM API Key + id: kingfisher.nvidia.nim.1 + pattern: | + (?xi) + \b + ( + nvapi-[A-Z0-9_-]{60,70} + ) + \b + confidence: medium + min_entropy: 3.5 + examples: + - "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" + - "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" + - "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" + validation: + type: Http + content: + request: + method: GET + url: "https://api.nvcf.nvidia.com/v2/nvcf/functions" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + expected: ["application/json"] + - type: WordMatch + words: ["id", "versionId"] From d1bd843567066e1e3805e408f9cc9c1bd12d67b9 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 12:20:44 -0700 Subject: [PATCH 09/12] added rules for together.ai --- CHANGELOG.md | 2 +- data/rules/togetherai.yml | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 data/rules/togetherai.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 933d500..330414a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai - Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check ## [1.45.0] diff --git a/data/rules/togetherai.yml b/data/rules/togetherai.yml new file mode 100644 index 0000000..ee43097 --- /dev/null +++ b/data/rules/togetherai.yml @@ -0,0 +1,36 @@ +rules: + - name: Together.ai API Key + id: kingfisher.together.1 + pattern: | + (?xi) + \b + ( + tgp_v1_[A-Z0-9_-]{43} + ) + confidence: medium + min_entropy: 3.0 + examples: + - tgp_v1_Tctm6OfOeNkwLIKkyxJxUHIqNKx2AvFr65tQRIOMgzY + - tgp_v1_HgWU7iym2128y2Pdj-7-9kX4W_MSCcIT5EhuY_SmNqc + - tgp_v1_xeybrcbPy2c10JR9eAlkOq1qvPaBXT3ZbXp8yKq1VME + - tgp_v1_yanBH3171P6HAZ01LbzSDlnOiXM3lo_89kG2Gg5yzko + validation: + type: Http + content: + request: + method: GET + url: "https://api.together.xyz/v1/models" + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"id":' + - '"object":' + references: + - https://docs.together.ai/reference/authentication + - https://docs.together.ai/reference/models-list From c2de3bc25ce49c89e27f9def423d6be738ad0bb8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 12:43:41 -0700 Subject: [PATCH 10/12] added rules for zhipu --- CHANGELOG.md | 4 +- README.md | 8 - data/rules/zhipu.yml | 34 +++++ src/cli/commands/mod.rs | 1 - src/cli/commands/precommit.rs | 265 ---------------------------------- src/cli/global.rs | 6 +- src/main.rs | 5 - 7 files changed, 37 insertions(+), 286 deletions(-) create mode 100644 data/rules/zhipu.yml delete mode 100644 src/cli/commands/precommit.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 330414a..639e9d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,8 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai -- Added a new precommit subcommand that installs a git pre-commit hook, prompting or accepting --global/--repo flags to control scope and configuring the hook to run kingfisher --quiet --only-valid --no-update-check +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu, + ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` diff --git a/README.md b/README.md index 32eaac2..56e5f18 100644 --- a/README.md +++ b/README.md @@ -93,14 +93,6 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` -### Install as a Git pre-commit hook - -Run `kingfisher precommit --install` to set up a Git pre-commit hook that runs -`kingfisher scan --quiet --only-valid --no-update-check` before each commit. -Use `--global` to operate on all repositories or `--repo` to target only the -current repository without prompting. Remove the hook with -`kingfisher precommit --remove`. - ### Run Kingfisher in Docker Run the dockerized Kingfisher container: diff --git a/data/rules/zhipu.yml b/data/rules/zhipu.yml new file mode 100644 index 0000000..bc1bffd --- /dev/null +++ b/data/rules/zhipu.yml @@ -0,0 +1,34 @@ +rules: + - name: Zhipu (BigModel) API Key + id: kingfisher.zhipu.1 + pattern: | + (?xi) + \b + ( + [A-F0-9]{32} + \. + [A-Z0-9]{16} + ) + \b + confidence: medium + min_entropy: 4.0 + examples: + - "3494c505cf244a3fb17417d6894d404c.LLSZ2InjarUXEhNr" + - "a64cb6a9b4e840919351d041dbe65654.eh1YZt0SAhSTOsNR" + - "4d140d7d21c4477ab20d5090e530496c.A5pEbmgcid2deKNA" + validation: + type: Http + content: + request: + method: GET + url: "https://open.bigmodel.cn/api/paas/v4/files" + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: "application/json" + timeout_seconds: 12 + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ["object", "data"] diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index ec48c0f..c73ec82 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -2,6 +2,5 @@ pub mod github; pub mod gitlab; pub mod inputs; pub mod output; -pub mod precommit; pub mod rules; pub mod scan; diff --git a/src/cli/commands/precommit.rs b/src/cli/commands/precommit.rs deleted file mode 100644 index 4589d18..0000000 --- a/src/cli/commands/precommit.rs +++ /dev/null @@ -1,265 +0,0 @@ -use std::io::{self, Write}; -use std::path::{Path, PathBuf}; -use std::process::Command; -use std::{env, fs}; - -use anyhow::{anyhow, Context, Result}; -use clap::{ArgAction, ArgGroup, Args}; - -use crate::gix; - -/// Arguments for `precommit` command -#[derive(Args, Debug, Clone)] -#[command(group( - ArgGroup::new("action") - .args(["install", "remove"]) - .required(true) - .multiple(false) -))] -pub struct PrecommitArgs { - /// Install the pre-commit hook - #[arg(long, action = ArgAction::SetTrue, conflicts_with = "remove")] - pub install: bool, - - /// Remove the pre-commit hook - #[arg(long, action = ArgAction::SetTrue, conflicts_with = "install")] - pub remove: bool, - - /// Operate on all repositories using the global hooks directory - #[arg(long, conflicts_with = "repo")] - pub global: bool, - - /// Operate only on the current repository - #[arg(long, conflicts_with = "global")] - pub repo: bool, -} - -/// Scope of operation -enum Scope { - Global, - Repo, -} - -/// Run the `precommit` command -pub fn run(args: &PrecommitArgs) -> Result<()> { - if args.install { - if let Some(path) = find_existing_hook()? { - println!("Kingfisher pre-commit hook already installed at {}", path.display()); - return Ok(()); - } - let scope = determine_scope(args, true)?; - let hook_path = match scope { - Scope::Global => install_global()?, - Scope::Repo => install_repo()?, - }; - println!("Installed Kingfisher pre-commit hook at {}", hook_path.display()); - } else if args.remove { - let scope = determine_scope(args, false)?; - let removed = match scope { - Scope::Global => remove_global()?, - Scope::Repo => remove_repo()?, - }; - if let Some(path) = removed { - println!("Removed Kingfisher pre-commit hook from {}", path.display()); - } else { - println!("No Kingfisher pre-commit hook found to remove"); - } - } - Ok(()) -} - -fn determine_scope(args: &PrecommitArgs, installing: bool) -> Result { - if args.global { - Ok(Scope::Global) - } else if args.repo { - Ok(Scope::Repo) - } else { - let verb = if installing { "Install" } else { "Remove" }; - prompt_scope(verb) - } -} - -fn prompt_scope(action: &str) -> Result { - print!("{} pre-commit hook globally? [y/N]: ", action); - io::stdout().flush()?; - let mut input = String::new(); - io::stdin().read_line(&mut input)?; - if matches!(input.trim().to_lowercase().as_str(), "y" | "yes") { - Ok(Scope::Global) - } else { - Ok(Scope::Repo) - } -} - -fn find_existing_hook() -> Result> { - // Check repo-local hook - if let Ok(repo) = gix::discover(".") { - let path = repo.path().join("hooks").join(hook_filename()); - if hook_contains_kingfisher(&path) { - return Ok(Some(path)); - } - } - - // Check global hook - if let Some(dir) = current_global_hooks_dir()? { - let path = dir.join(hook_filename()); - if hook_contains_kingfisher(&path) { - return Ok(Some(path)); - } - } - - Ok(None) -} - -fn install_repo() -> Result { - let repo = gix::discover(".").context("Not inside a git repository")?; - let hooks_dir = repo.path().join("hooks"); - fs::create_dir_all(&hooks_dir)?; - let hook_path = hooks_dir.join(hook_filename()); - write_hook(&hook_path)?; - Ok(hook_path) -} - -fn install_global() -> Result { - let hooks_dir = get_or_set_global_hooks_dir()?; - let hook_path = hooks_dir.join(hook_filename()); - write_hook(&hook_path)?; - Ok(hook_path) -} - -fn remove_repo() -> Result> { - let repo = gix::discover(".").context("Not inside a git repository")?; - let hook_path = repo.path().join("hooks").join(hook_filename()); - if remove_hook(&hook_path)? { - Ok(Some(hook_path)) - } else { - Ok(None) - } -} - -fn remove_global() -> Result> { - if let Some(dir) = current_global_hooks_dir()? { - let hook_path = dir.join(hook_filename()); - if remove_hook(&hook_path)? { - return Ok(Some(hook_path)); - } - } - Ok(None) -} - -fn write_hook(path: &Path) -> Result<()> { - if path.exists() { - let content = fs::read_to_string(path)?; - if content.contains("kingfisher") { - println!("Kingfisher pre-commit hook already installed at {}", path.display()); - return Ok(()); - } - let mut file = fs::OpenOptions::new().append(true).open(path)?; - if !content.ends_with('\n') { - writeln!(file)?; - } - writeln!(file, "{}", hook_call_line())?; - } else { - fs::write(path, hook_content())?; - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - let mut perms = fs::metadata(path)?.permissions(); - perms.set_mode(0o755); - fs::set_permissions(path, perms)?; - } - } - Ok(()) -} - -fn remove_hook(path: &Path) -> Result { - if !path.exists() { - return Ok(false); - } - let content = fs::read_to_string(path)?; - if !content.contains("kingfisher") { - return Ok(false); - } - let ending = if cfg!(windows) { "\r\n" } else { "\n" }; - let lines: Vec<&str> = content.lines().filter(|l| !l.contains("kingfisher")).collect(); - if lines.is_empty() { - fs::remove_file(path)?; - } else { - let mut new_content = lines.join(ending); - new_content.push_str(ending); - fs::write(path, new_content)?; - } - Ok(true) -} - -fn hook_contains_kingfisher(path: &Path) -> bool { - fs::read_to_string(path).map(|c| c.contains("kingfisher")).unwrap_or(false) -} - -fn hook_filename() -> &'static str { - if cfg!(windows) { - "pre-commit.bat" - } else { - "pre-commit" - } -} - -fn hook_content() -> String { - if cfg!(windows) { - format!("@echo off\r\n{}\r\n", hook_call_line()) - } else { - format!("#!/bin/sh\n{}\n", hook_call_line()) - } -} - -fn hook_call_line() -> String { - if cfg!(windows) { - "kingfisher scan --quiet --only-valid --no-update-check %*".to_string() - } else { - "kingfisher scan --quiet --only-valid --no-update-check \"$@\"".to_string() - } -} - -fn current_global_hooks_dir() -> Result> { - let output = - Command::new("git").args(["config", "--global", "--get", "core.hooksPath"]).output()?; - if output.status.success() { - let p = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if p.is_empty() { - Ok(None) - } else { - Ok(Some(PathBuf::from(p))) - } - } else { - Ok(None) - } -} - -fn get_or_set_global_hooks_dir() -> Result { - if let Some(dir) = current_global_hooks_dir()? { - fs::create_dir_all(&dir)?; - return Ok(dir); - } - - let home = home_dir().ok_or_else(|| anyhow!("Unable to determine home directory"))?; - let hooks = home.join(".githooks"); - fs::create_dir_all(&hooks)?; - Command::new("git") - .args([ - "config", - "--global", - "core.hooksPath", - hooks.to_str().ok_or_else(|| anyhow!("Invalid path"))?, - ]) - .status() - .context("Failed to set git global core.hooksPath")?; - Ok(hooks) -} - -fn home_dir() -> Option { - if cfg!(windows) { - env::var_os("USERPROFILE").map(PathBuf::from) - } else { - env::var_os("HOME").map(PathBuf::from) - } -} diff --git a/src/cli/global.rs b/src/cli/global.rs index 4116ddd..c87e61e 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -7,7 +7,7 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use tracing::Level; use crate::cli::commands::{ - github::GitHubArgs, gitlab::GitLabArgs, precommit::PrecommitArgs, rules::RulesArgs, + github::GitHubArgs, gitlab::GitLabArgs, rules::RulesArgs, scan::ScanArgs, }; @@ -63,10 +63,6 @@ pub enum Command { /// Manage rules #[command(alias = "rule")] Rules(RulesArgs), - - /// Manage Kingfisher as a Git pre-commit hook - #[command(name = "precommit")] - Precommit(PrecommitArgs), } pub static RAM_GB: Lazy> = Lazy::new(|| { diff --git a/src/main.rs b/src/main.rs index 7ef0e9b..38c0a88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,7 +69,6 @@ use tracing_subscriber::{ use url::Url; use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; -use crate::cli::commands::precommit; fn main() -> anyhow::Result<()> { color_backtrace::install(); @@ -82,7 +81,6 @@ fn main() -> anyhow::Result<()> { Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands Command::Rules(_) => num_cpus::get(), // Default for Rules commands - Command::Precommit(_) => num_cpus::get(), }; // Set up the Tokio runtime with the specified number of threads @@ -221,9 +219,6 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { run_rules_list(&list_args)?; } }, - Command::Precommit(pre_args) => { - precommit::run(&pre_args)?; - } Command::GitHub(github_args) => match github_args.command { GitHubCommand::Repos(repos_command) => match repos_command { GitHubReposCommand::List(list_args) => { From 332f2c59f9ee2ec3aa95389275da38f340768d14 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 15:35:01 -0700 Subject: [PATCH 11/12] added top level 'self-update' cli sub command to update the binary independently. Now supports updating over homebrew managed binary --- CHANGELOG.md | 4 +- README.md | 49 +++++++----- data/rules/cerebras.yml | 2 +- src/cli/global.rs | 7 +- src/main.rs | 170 +++++++++++++++++++++------------------- src/update.rs | 36 +-------- src/validation/jwt.rs | 3 +- 7 files changed, 127 insertions(+), 144 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 639e9d2..41373bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,8 @@ All notable changes to this project will be documented in this file. ## [1.46.0] - Improved rules: AWS, pem -- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu, - +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu +- Added `self-update` command to update the binary independently. Now supports updating over homebrew managed binary ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` diff --git a/README.md b/README.md index 56e5f18..10fdbf7 100644 --- a/README.md +++ b/README.md @@ -8,21 +8,12 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production

-Kingfisher originated as a fork of Praetorian's Nosey Parker, and is built atop their incredible work and the work contributed by the Nosey Parker community. - -## What Kingfisher Adds -- **Live validation** via cloud-provider APIs -- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages -- **Compressed Files**: Supports extracting and scanning compressed files for secrets -- **Baseline mode**: ignore known secrets, flag only new ones -- **Allowlist support**: suppress false positives with custom regexes or words -- **Language-aware detection** (source-code parsing) for ~20 languages -- **Native Windows** binary - +Originally forked from Praetorian’s Nosey Parker, Kingfisher adds live cloud-API validation; many more targets (GitLab, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) + - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more - **Multiple targets**: - **Git history**: local repos or GitHub/GitLab orgs/users - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code @@ -154,18 +145,18 @@ docker run --rm \ # 🔐 Detection Rules at a Glance -Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. Below is an overview: +Kingfisher ships with [hundreds of rules](/data/rules/) that cover everything from classic cloud keys to the latest AI SaaS tokens. Below is an overview: | Category | What we catch | |----------|---------------| -| **AI / LLM APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), and more -| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more -| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm, PyPI, and more -| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun, SendGrid, Mailchimp, and more -| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more -| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more -| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more -| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more +| **AI SaaS APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, Zhipu, and more | +| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more | +| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm, PyPI, and more | +| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun, SendGrid, Mailchimp, and more | +| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more | +| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more | +| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more | +| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more | ## Write Custom Rules! @@ -543,9 +534,11 @@ Kingfisher automatically queries GitHub for a newer release when it starts and t - **Hands-free updates** – Add `--self-update` to any Kingfisher command - * If a newer version exists, Kingfisher will download it, replace the running binary, and re-launch itself with the **exact same arguments**. + * If a newer version exists, Kingfisher will download it, replace the running binary, and re-launch itself with the **exact same arguments**. * If the update fails or no newer release is found, the current run proceeds as normal +- **Manual update** – Run `kingfisher self-update` to update the binary without scanning + - **Disable version checks** – Pass `--no-update-check` to skip both the startup and shutdown checks entirely # Advanced Options @@ -661,6 +654,20 @@ Use `--rule-stats` to collect timing information for every rule. After scanning, kingfisher scan --help ``` + +## Origins and Divergence + +Kingfisher began as a fork of Praetorian’s Nosey Parker, as our experiment with adding live validation support and embedding that validation directly inside each rule. + +Since that initial fork, it has diverged heavily from Nosey Parker: +- Replaced the SQLite datastore with an in-memory store + Bloom filter +- Collapsed the workflow into a single scan-and-report phase with direct JSON/BSON/SARIF outputs +- Added Tree-Sitter parsing on top of Hyperscan for deeper language-aware detection +- Removed datastore-driven reporting/annotations in favor of live validation, baselines, allowlists, and compressed-file extraction +- Expanded support for new targets (GitLab, Jira, Confluence, Slack, S3, Docker, etc.) +- Delivered cross-platform builds, including native Windows + + # Roadmap - More rules diff --git a/data/rules/cerebras.yml b/data/rules/cerebras.yml index bb5ad17..af0f49c 100644 --- a/data/rules/cerebras.yml +++ b/data/rules/cerebras.yml @@ -33,4 +33,4 @@ rules: - "csk-6nptf4w5cx36fw58t3hkx48jvm52wm693pex5tjm29kn55yt" - "csk-e2knhj8h3h4erp6crfx6rh52tvecj4xnwmtjf3mtrvtt54et" - "csk-rhw8npjrp6kpv9phm55n5nv5rkkm4492jepx3yh65dc9cwe9" - - "csk-w6p3nxk3`c5249mrpmv642fffert28rwdkepffrpn8rtfr9h" + - "csk-w6p3nxk3dc5249mrpmv642fffert28rwdkepffrpn8rtfr9h" diff --git a/src/cli/global.rs b/src/cli/global.rs index c87e61e..8f761de 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -7,8 +7,7 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use tracing::Level; use crate::cli::commands::{ - github::GitHubArgs, gitlab::GitLabArgs, rules::RulesArgs, - scan::ScanArgs, + github::GitHubArgs, gitlab::GitLabArgs, rules::RulesArgs, scan::ScanArgs, }; #[deny(missing_docs)] @@ -63,6 +62,10 @@ pub enum Command { /// Manage rules #[command(alias = "rule")] Rules(RulesArgs), + + /// Update the Kingfisher binary + #[command(name = "self-update")] + SelfUpdate, } pub static RAM_GB: Lazy> = Lazy::new(|| { diff --git a/src/main.rs b/src/main.rs index 38c0a88..15c1a8a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,9 +78,10 @@ fn main() -> anyhow::Result<()> { // Determine the number of jobs, defaulting to the number of CPUs let num_jobs = match args.command { Command::Scan(ref scan_args) => scan_args.num_jobs, + Command::SelfUpdate => 1, // Self-update doesn't need a thread pool Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands - Command::Rules(_) => num_cpus::get(), // Default for Rules commands + Command::Rules(_) => num_cpus::get(), // Default for Rules commands }; // Set up the Tokio runtime with the specified number of threads @@ -171,92 +172,97 @@ pub fn determine_exit_code(datastore: &Arc> } async fn async_main(args: CommandLineArgs) -> Result<()> { - // Create a temporary directory - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Create the in-memory datastore - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); setup_logging(&args.global_args); - let update_msg = check_for_update(&args.global_args, None); + let global_args = args.global_args.clone(); + match args.command { - Command::Scan(mut scan_args) => { - // ————————————————————————————————————————— - // If no paths or a single "-", slurp stdin into a temp file - // ————————————————————————————————————————— - info!( - "Launching with {} concurrent scan jobs. Use --num-jobs to override.", - &scan_args.num_jobs - ); - let paths = &scan_args.input_specifier_args.path_inputs; - let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); - if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { - // read all stdin - let mut buf = Vec::new(); - std::io::stdin().read_to_end(&mut buf)?; - // write into temp_dir - let stdin_file = temp_dir.path().join("stdin_input"); - std::fs::write(&stdin_file, buf)?; - // replace inputs - scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; - } - - // now proceed exactly as before - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - run_scan(&args.global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; - let exit_code = determine_exit_code(&datastore); - - if let Err(e) = temp_dir.close() { - eprintln!("Failed to close temporary directory: {}", e); - } - std::process::exit(exit_code); + Command::SelfUpdate => { + let mut g = global_args; + g.self_update = true; + g.no_update_check = false; + check_for_update(&g, None); + Ok(()) } - Command::Rules(ref rule_args) => match &rule_args.command { - RulesCommand::Check(check_args) => { - run_rules_check(&check_args)?; - } - RulesCommand::List(list_args) => { - run_rules_list(&list_args)?; - } - }, - Command::GitHub(github_args) => match github_args.command { - GitHubCommand::Repos(repos_command) => match repos_command { - GitHubReposCommand::List(list_args) => { - github::list_repositories( - github_args.github_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.organization, - list_args.repo_specifiers.all_organizations, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; + command => { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let update_msg = check_for_update(&global_args, None); + match command { + Command::Scan(mut scan_args) => { + info!( + "Launching with {} concurrent scan jobs. Use --num-jobs to override.", + &scan_args.num_jobs + ); + let paths = &scan_args.input_specifier_args.path_inputs; + let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); + if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + let stdin_file = temp_dir.path().join("stdin_input"); + std::fs::write(&stdin_file, buf)?; + scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; + } + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; + let exit_code = determine_exit_code(&datastore); + + if let Err(e) = temp_dir.close() { + eprintln!("Failed to close temporary directory: {}", e); + } + std::process::exit(exit_code); } - }, - }, - Command::GitLab(gitlab_args) => match gitlab_args.command { - GitLabCommand::Repos(repos_command) => match repos_command { - GitLabReposCommand::List(list_args) => { - kingfisher::gitlab::list_repositories( - gitlab_args.gitlab_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.group, - list_args.repo_specifiers.all_groups, - list_args.repo_specifiers.include_subgroups, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; - } - }, - }, + Command::Rules(ref rule_args) => match &rule_args.command { + RulesCommand::Check(check_args) => { + run_rules_check(&check_args)?; + } + RulesCommand::List(list_args) => { + run_rules_list(&list_args)?; + } + }, + Command::GitHub(github_args) => match github_args.command { + GitHubCommand::Repos(repos_command) => match repos_command { + GitHubReposCommand::List(list_args) => { + github::list_repositories( + github_args.github_api_url, + global_args.ignore_certs, + global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.organization, + list_args.repo_specifiers.all_organizations, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + Command::GitLab(gitlab_args) => match gitlab_args.command { + GitLabCommand::Repos(repos_command) => match repos_command { + GitLabReposCommand::List(list_args) => { + kingfisher::gitlab::list_repositories( + gitlab_args.gitlab_api_url, + global_args.ignore_certs, + global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.group, + list_args.repo_specifiers.all_groups, + list_args.repo_specifiers.include_subgroups, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + Command::SelfUpdate => unreachable!(), + } + if let Some(msg) = update_msg { + info!("{msg}"); + } + Ok(()) + } } - if let Some(msg) = update_msg { - info!("{msg}"); - } - Ok(()) } /// Create a default ScanArgs instance for rule loading diff --git a/src/update.rs b/src/update.rs index 8f66c59..76629be 100644 --- a/src/update.rs +++ b/src/update.rs @@ -15,11 +15,7 @@ // `style_finding_active_heading` style so that they stand out alongside normal // scan output. -use std::{ - fs, - io::{ErrorKind, IsTerminal}, - path::PathBuf, -}; +use std::io::{ErrorKind, IsTerminal}; use self_update::{backends::github::Update, cargo_crate_version, errors::Error as UpdError}; use semver::Version; @@ -27,17 +23,6 @@ use tracing::{error, info, warn}; use crate::{cli::global::GlobalArgs, reporter::styles::Styles}; -/// Return `true` when the canonical executable path lives inside a Homebrew Cellar. -/// Works for Intel macOS (/usr/local/Cellar), Apple‑Silicon macOS (/opt/homebrew/Cellar) -/// and Linuxbrew (~/.linuxbrew/Cellar). -fn installed_via_homebrew() -> bool { - fn canonical_exe() -> Option { - std::env::current_exe().ok().and_then(|p| fs::canonicalize(p).ok()) - } - - canonical_exe().map(|p| p.components().any(|c| c.as_os_str() == "Cellar")).unwrap_or(false) -} - /// Check GitHub for a newer Kingfisher release and optionally self‑update. /// /// * `base_url` lets tests point at a mock server. @@ -51,16 +36,6 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt let use_color = std::io::stderr().is_terminal() && !global_args.quiet; let styles = Styles::new(use_color); - let is_brew = installed_via_homebrew(); - if is_brew { - info!( - "{}", - styles.style_finding_active_heading.apply_to( - "Homebrew install detected - will notify about updates but not self-update" - ) - ); - } - info!("{}", "Checking for updates…"); let mut builder = Update::configure(); @@ -145,7 +120,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt info!("{}", styles.style_finding_active_heading.apply_to(&plain)); // Attempt self‑update when allowed and feasible. - if global_args.self_update && !is_brew { + if global_args.self_update { match updater.update() { Ok(status) => info!( "{}", @@ -167,13 +142,6 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt _ => error!("Failed to update: {e}"), }, } - } else if is_brew { - info!( - "{}", - styles - .style_finding_active_heading - .apply_to("Run `brew upgrade kingfisher` to install the new version.") - ); } Some(plain) diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 1f9b2e7..a3ee9c7 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -101,7 +101,6 @@ pub async fn validate_jwt_with(token: &str, opts: &ValidateOptions) -> Result<(b let header_val: serde_json::Value = serde_json::from_slice(&header_json).map_err(|e| anyhow!("invalid header json: {e}"))?; let alg_str = header_val.get("alg").and_then(|v| v.as_str()).unwrap_or(""); - // --- Policy: reject `alg: none` unless explicitly allowed ------------------ if alg_str.eq_ignore_ascii_case("none") { @@ -119,7 +118,7 @@ pub async fn validate_jwt_with(token: &str, opts: &ValidateOptions) -> Result<(b return Ok((false, "unsigned JWT (alg: none) not allowed".into())); } } - + // Safe to decode full header now that we know alg != none let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; let alg = header.alg; From 96f17849539e2aebf3bba7034a5816544f6cd1ee Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 27 Aug 2025 15:43:31 -0700 Subject: [PATCH 12/12] changes in response to code review --- data/rules/cerebras.yml | 2 +- data/rules/fireworksai.yml | 2 +- data/rules/nvidia.yml | 1 - data/rules/weightsandbiases.yml | 6 ++---- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/data/rules/cerebras.yml b/data/rules/cerebras.yml index af0f49c..73760a6 100644 --- a/data/rules/cerebras.yml +++ b/data/rules/cerebras.yml @@ -17,7 +17,7 @@ rules: method: GET url: "https://api.cerebras.ai/v1/models" headers: - Authorization: "Bearer {{TOKEN}}" + Authorization: "Bearer {{ TOKEN }}" response_matcher: - report_response: true - type: StatusMatch diff --git a/data/rules/fireworksai.yml b/data/rules/fireworksai.yml index 9ca431f..0933441 100644 --- a/data/rules/fireworksai.yml +++ b/data/rules/fireworksai.yml @@ -17,7 +17,7 @@ rules: method: GET url: "https://api.fireworks.ai/inference/v1/models" headers: - Authorization: "Bearer {{TOKEN}}" + Authorization: "Bearer {{ TOKEN }}" response_matcher: - report_response: true - type: StatusMatch diff --git a/data/rules/nvidia.yml b/data/rules/nvidia.yml index 25a90e1..1dc7b31 100644 --- a/data/rules/nvidia.yml +++ b/data/rules/nvidia.yml @@ -26,6 +26,5 @@ rules: - report_response: true - type: StatusMatch status: [200] - expected: ["application/json"] - type: WordMatch words: ["id", "versionId"] diff --git a/data/rules/weightsandbiases.yml b/data/rules/weightsandbiases.yml index 6661f53..bed9ca5 100644 --- a/data/rules/weightsandbiases.yml +++ b/data/rules/weightsandbiases.yml @@ -5,8 +5,6 @@ rules: (?xi) \b (?:wandb|weightsandbiases) - (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,16}? ( [a-f0-9]{40} @@ -28,10 +26,10 @@ rules: Authorization: "Basic {{ 'api:' | append: TOKEN | b64enc }}" Content-Type: "application/json" body: | - {"query":"query { viewer { id } }"} + {"query":"query { viewer { email username } }"} response_matcher: - report_response: true - type: JsonValid - type: WordMatch words: - - '"id"' + - '"username"'