forked from mirrors/kingfisher
Automatically extracts and scans SQLite database contents for secrets stored in table rows
This commit is contained in:
parent
7845cfa727
commit
1f4ccb8144
8 changed files with 365 additions and 5 deletions
|
|
@ -3,6 +3,7 @@
|
|||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.85.0]
|
||||
- SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Controlled by the existing `--extract-archives` flag.
|
||||
- Performance: pipelined ODB enumeration — scanning now begins while blob OIDs are still being discovered, overlapping I/O with pattern matching.
|
||||
- Performance: skip blobs smaller than 20 bytes during enumeration (too small to contain any secret).
|
||||
- Performance: preserve pack-ascending blob order in the metadata path for better I/O locality when Rayon splits work.
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ http = "1.4"
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.84.0"
|
||||
version = "1.85.0"
|
||||
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
@ -198,6 +198,7 @@ zip = { version = "2.4.2", default-features = false, features = ["deflate", "def
|
|||
tar = "0.4.44"
|
||||
lzma-rs = "0.3.0"
|
||||
asar = "0.3.0"
|
||||
rusqlite = { version = "0.34", features = ["bundled"] }
|
||||
blake3 = "1.8.2"
|
||||
memchr = "2.7"
|
||||
memmap2 = "0.9.9"
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ Designed for offensive security engineers and blue-teamers alike, Kingfisher hel
|
|||
- **Blast Radius Mapping**: instantly map leaked keys to their effective cloud identities and exposed resources with `--access-map`. Supports AWS, GCP, Azure, GitHub, Gitlab, and more token support coming.
|
||||
- **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, AWS Bedrock, Voyage AI, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more
|
||||
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
|
||||
- **SQLite Database Scanning**: Automatically extracts and scans SQLite database contents for secrets stored in table rows
|
||||
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
|
||||
- **Checksum-aware detection**: verifies tokens with built-in checksums (e.g., GitHub, Confluent, Zuplo) — no API calls required
|
||||
- **Built-in Report Viewer**: Visualize and triage findings locally with `kingfisher view ./report-file.json`
|
||||
|
|
@ -600,7 +601,7 @@ kingfisher scan /tmp/repo --branch feature-1 \
|
|||
|
||||
# Lineage and Evolution
|
||||
|
||||
Kingfisher began as an internal fork of Nosey Parker, used as a high-performance foundation for secret detection.
|
||||
Kingfisher began as an internal fork of [Nosey Parker](https://github.com/praetorian-inc/noseyparker), used as a high-performance foundation for secret detection.
|
||||
|
||||
Since then it has evolved far beyond that starting point, introducing live validation, hundreds of new rules, additional scan targets, and major architectural changes across nearly every subsystem.
|
||||
|
||||
|
|
@ -610,7 +611,7 @@ Since then it has evolved far beyond that starting point, introducing live valid
|
|||
- **Baseline management** to suppress known findings over time
|
||||
- **Tree-sitter parsing** layered on Hyperscan for language-aware detection
|
||||
- **More scan targets** (GitLab, Bitbucket, Gitea, Jira, Confluence, Slack, S3, GCS, Docker, Hugging Face, etc.)
|
||||
- **Compressed Files** scanning support added
|
||||
- **Compressed Files** and **SQLite database** scanning support
|
||||
- **New storage model** (in-memory + Bloom filter, replacing SQLite)
|
||||
- **Unified workflow** with JSON/BSON/SARIF outputs
|
||||
- **Cross-platform builds** for Linux, macOS, and Windows
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ pub mod scanner;
|
|||
pub mod scanner_pool;
|
||||
pub mod slack;
|
||||
pub mod snippet;
|
||||
pub mod sqlite;
|
||||
pub mod update;
|
||||
pub mod util;
|
||||
pub mod validation;
|
||||
|
|
|
|||
|
|
@ -39,9 +39,10 @@ use crate::{
|
|||
scanner::{
|
||||
processing::BlobProcessor,
|
||||
runner::{create_datastore_channel, spawn_datastore_writer_thread},
|
||||
util::is_compressed_file,
|
||||
util::{is_compressed_file, is_sqlite_file},
|
||||
},
|
||||
scanner_pool::ScannerPool,
|
||||
sqlite::extract_sqlite_contents,
|
||||
DirectoryResult, EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator,
|
||||
FoundInput, GitDiffConfig, GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator,
|
||||
PathBuf,
|
||||
|
|
@ -335,7 +336,32 @@ impl ParallelBlobIterator for FileResult {
|
|||
let extraction_enabled = self.extract_archives;
|
||||
let max_extraction_depth = self.extraction_depth;
|
||||
|
||||
if extraction_enabled && is_compressed_file(&self.path) {
|
||||
if extraction_enabled && is_sqlite_file(&self.path) {
|
||||
match extract_sqlite_contents(&self.path) {
|
||||
Ok(tables) if tables.is_empty() => {
|
||||
debug!("No tables found in SQLite database: {}", self.path.display());
|
||||
Ok(None)
|
||||
}
|
||||
Ok(tables) => {
|
||||
let items = tables
|
||||
.into_iter()
|
||||
.map(|(logical_name, data)| {
|
||||
let full_path = self.path.join(logical_name);
|
||||
let origin = OriginSet::new(Origin::from_file(full_path), vec![]);
|
||||
(origin, Blob::from_bytes(data))
|
||||
})
|
||||
.collect();
|
||||
Ok(Some(FileResultIter {
|
||||
iter_kind: FileResultIterKind::Archive(items),
|
||||
_marker: PhantomData,
|
||||
}))
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to extract SQLite database {}: {e:#}", self.path.display());
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
} else if extraction_enabled && is_compressed_file(&self.path) {
|
||||
match decompress_file_to_temp(&self.path) {
|
||||
Ok((content, _temp_dir)) => match content {
|
||||
// Single-file decompression fully in memory.
|
||||
|
|
|
|||
|
|
@ -30,3 +30,25 @@ pub fn is_compressed_file(path: &Path) -> bool {
|
|||
false
|
||||
}
|
||||
}
|
||||
|
||||
const SQLITE_EXTENSIONS: &[&str] = &["db", "sqlite", "sqlite3", "db3", "s3db", "sl3"];
|
||||
/// SQLite file header magic bytes. Useful for detecting extensionless SQLite
|
||||
/// files (e.g. Chrome `Cookies`, `History`, `Web Data`).
|
||||
#[allow(dead_code)]
|
||||
pub const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
|
||||
|
||||
pub fn is_sqlite_file(path: &Path) -> bool {
|
||||
if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
|
||||
let ext_lower = ext.to_lowercase();
|
||||
if SQLITE_EXTENSIONS.iter().any(|e| *e == ext_lower) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Check the first 16 bytes of `data` for the SQLite magic header.
|
||||
#[allow(dead_code)]
|
||||
pub fn has_sqlite_magic(data: &[u8]) -> bool {
|
||||
data.len() >= SQLITE_MAGIC.len() && data[..SQLITE_MAGIC.len()] == *SQLITE_MAGIC
|
||||
}
|
||||
|
|
|
|||
260
src/sqlite.rs
Normal file
260
src/sqlite.rs
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
use std::fmt::Write as FmtWrite;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use rusqlite::{Connection, OpenFlags};
|
||||
use tracing::debug;
|
||||
|
||||
const MAX_ROWS_PER_TABLE: usize = 100_000;
|
||||
const MAX_TOTAL_BYTES: usize = 256 * 1024 * 1024;
|
||||
|
||||
/// Extract all user tables from a SQLite database as SQL text.
|
||||
///
|
||||
/// Returns a vec of `(logical_name, sql_text)` pairs, one per table.
|
||||
/// Each entry contains the CREATE TABLE statement followed by INSERT
|
||||
/// statements with explicit column names so that keyword-based secret
|
||||
/// detectors can match column names like "api_key" near their values.
|
||||
pub fn extract_sqlite_contents(path: &Path) -> Result<Vec<(String, Vec<u8>)>> {
|
||||
let conn = Connection::open_with_flags(path, OpenFlags::SQLITE_OPEN_READ_ONLY)
|
||||
.with_context(|| format!("Failed to open SQLite database: {}", path.display()))?;
|
||||
|
||||
conn.busy_timeout(std::time::Duration::from_secs(5))?;
|
||||
|
||||
let tables = list_user_tables(&conn)?;
|
||||
if tables.is_empty() {
|
||||
debug!("SQLite database has no user tables: {}", path.display());
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut results = Vec::with_capacity(tables.len());
|
||||
let mut total_bytes: usize = 0;
|
||||
|
||||
for (table_name, create_sql) in &tables {
|
||||
if total_bytes >= MAX_TOTAL_BYTES {
|
||||
debug!(
|
||||
"SQLite extraction hit total size limit ({MAX_TOTAL_BYTES} bytes), \
|
||||
skipping remaining tables in {}",
|
||||
path.display()
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
match dump_table(&conn, table_name, create_sql, MAX_TOTAL_BYTES - total_bytes) {
|
||||
Ok(sql_text) => {
|
||||
total_bytes += sql_text.len();
|
||||
let logical_name = format!("{}.sql", table_name);
|
||||
results.push((logical_name, sql_text.into_bytes()));
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"Failed to dump table '{}' from {}: {e:#}",
|
||||
table_name,
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// List all user tables (excluding sqlite_* internal tables) along with
|
||||
/// their CREATE TABLE SQL.
|
||||
fn list_user_tables(conn: &Connection) -> Result<Vec<(String, String)>> {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT name, sql FROM sqlite_master \
|
||||
WHERE type = 'table' AND name NOT LIKE 'sqlite_%' \
|
||||
ORDER BY name",
|
||||
)?;
|
||||
|
||||
let rows = stmt.query_map([], |row| {
|
||||
let name: String = row.get(0)?;
|
||||
let sql: String = row.get(1)?;
|
||||
Ok((name, sql))
|
||||
})?;
|
||||
|
||||
let mut tables = Vec::new();
|
||||
for row in rows {
|
||||
tables.push(row?);
|
||||
}
|
||||
Ok(tables)
|
||||
}
|
||||
|
||||
/// Dump a single table as SQL text: the CREATE statement followed by
|
||||
/// INSERT INTO statements with named columns.
|
||||
fn dump_table(
|
||||
conn: &Connection,
|
||||
table_name: &str,
|
||||
create_sql: &str,
|
||||
remaining_budget: usize,
|
||||
) -> Result<String> {
|
||||
let mut out = String::with_capacity(4096);
|
||||
writeln!(out, "{create_sql};")?;
|
||||
|
||||
let col_names = column_names(conn, table_name)?;
|
||||
if col_names.is_empty() {
|
||||
return Ok(out);
|
||||
}
|
||||
|
||||
let columns_fragment = col_names
|
||||
.iter()
|
||||
.map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
|
||||
let query = format!("SELECT * FROM \"{}\"", table_name.replace('"', "\"\""));
|
||||
let mut stmt = conn.prepare(&query)?;
|
||||
let col_count = col_names.len();
|
||||
|
||||
let mut rows_emitted: usize = 0;
|
||||
let mut rows = stmt.query([])?;
|
||||
|
||||
while let Some(row) = rows.next()? {
|
||||
if rows_emitted >= MAX_ROWS_PER_TABLE {
|
||||
writeln!(out, "-- (truncated after {MAX_ROWS_PER_TABLE} rows)")?;
|
||||
break;
|
||||
}
|
||||
if out.len() >= remaining_budget {
|
||||
writeln!(out, "-- (truncated: size limit reached)")?;
|
||||
break;
|
||||
}
|
||||
|
||||
write!(out, "INSERT INTO \"{table_name}\" ({columns_fragment}) VALUES (")?;
|
||||
|
||||
for i in 0..col_count {
|
||||
if i > 0 {
|
||||
write!(out, ",")?;
|
||||
}
|
||||
write_value(&mut out, row, i)?;
|
||||
}
|
||||
|
||||
writeln!(out, ");")?;
|
||||
rows_emitted += 1;
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn column_names(conn: &Connection, table_name: &str) -> Result<Vec<String>> {
|
||||
let query = format!(
|
||||
"PRAGMA table_info(\"{}\")",
|
||||
table_name.replace('"', "\"\"")
|
||||
);
|
||||
let mut stmt = conn.prepare(&query)?;
|
||||
let names = stmt
|
||||
.query_map([], |row| {
|
||||
let name: String = row.get(1)?;
|
||||
Ok(name)
|
||||
})?
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(names)
|
||||
}
|
||||
|
||||
fn write_value(out: &mut String, row: &rusqlite::Row<'_>, idx: usize) -> Result<()> {
|
||||
use rusqlite::types::ValueRef;
|
||||
match row.get_ref(idx)? {
|
||||
ValueRef::Null => write!(out, "NULL")?,
|
||||
ValueRef::Integer(i) => write!(out, "{i}")?,
|
||||
ValueRef::Real(f) => write!(out, "{f}")?,
|
||||
ValueRef::Text(t) => {
|
||||
let s = String::from_utf8_lossy(t);
|
||||
write!(out, "'{}'", s.replace('\'', "''"))?;
|
||||
}
|
||||
ValueRef::Blob(b) => {
|
||||
write!(out, "X'")?;
|
||||
for byte in b {
|
||||
write!(out, "{byte:02X}")?;
|
||||
}
|
||||
write!(out, "'")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fn create_test_db() -> (NamedTempFile, std::path::PathBuf) {
|
||||
let tmp = NamedTempFile::new().unwrap();
|
||||
let path = tmp.path().to_path_buf();
|
||||
let conn = Connection::open(&path).unwrap();
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE user_info (id INTEGER PRIMARY KEY, username TEXT, api_key TEXT);
|
||||
INSERT INTO user_info VALUES (1, 'alice', 'ghp_abc123def456ghi789jkl012mno345pqr678');
|
||||
INSERT INTO user_info VALUES (2, 'bob', 'AKIAIOSFODNN7EXAMPLE');
|
||||
CREATE TABLE config (key TEXT, value TEXT);
|
||||
INSERT INTO config VALUES ('db_password', 's3cret!passw0rd');",
|
||||
)
|
||||
.unwrap();
|
||||
(tmp, path)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_all_tables() {
|
||||
let (_tmp, path) = create_test_db();
|
||||
let results = extract_sqlite_contents(&path).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
|
||||
let names: Vec<&str> = results.iter().map(|(n, _)| n.as_str()).collect();
|
||||
assert!(names.contains(&"config.sql"));
|
||||
assert!(names.contains(&"user_info.sql"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn output_contains_column_names_and_values() {
|
||||
let (_tmp, path) = create_test_db();
|
||||
let results = extract_sqlite_contents(&path).unwrap();
|
||||
|
||||
let user_info = results
|
||||
.iter()
|
||||
.find(|(n, _)| n == "user_info.sql")
|
||||
.unwrap();
|
||||
let sql = String::from_utf8_lossy(&user_info.1);
|
||||
|
||||
assert!(sql.contains("CREATE TABLE"));
|
||||
assert!(sql.contains("\"api_key\""));
|
||||
assert!(sql.contains("ghp_abc123def456ghi789jkl012mno345pqr678"));
|
||||
assert!(sql.contains("INSERT INTO"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_empty_database() {
|
||||
let tmp = NamedTempFile::new().unwrap();
|
||||
let path = tmp.path().to_path_buf();
|
||||
let conn = Connection::open(&path).unwrap();
|
||||
conn.execute_batch("CREATE TABLE empty_table (id INTEGER);")
|
||||
.unwrap();
|
||||
|
||||
let results = extract_sqlite_contents(&path).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
let sql = String::from_utf8_lossy(&results[0].1);
|
||||
assert!(sql.contains("CREATE TABLE"));
|
||||
assert!(!sql.contains("INSERT INTO"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_nonexistent_file() {
|
||||
let result = extract_sqlite_contents(Path::new("/nonexistent/database.db"));
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_special_characters_in_values() {
|
||||
let tmp = NamedTempFile::new().unwrap();
|
||||
let path = tmp.path().to_path_buf();
|
||||
let conn = Connection::open(&path).unwrap();
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE t (id INTEGER PRIMARY KEY, val TEXT);
|
||||
INSERT INTO t VALUES (1, 'it''s a test');
|
||||
INSERT INTO t VALUES (2, NULL);",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let results = extract_sqlite_contents(&path).unwrap();
|
||||
let sql = String::from_utf8_lossy(&results[0].1);
|
||||
assert!(sql.contains("'it''s a test'"));
|
||||
assert!(sql.contains("NULL"));
|
||||
}
|
||||
}
|
||||
48
tests/smoke_sqlite.rs
Normal file
48
tests/smoke_sqlite.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
use assert_cmd::prelude::*;
|
||||
|
||||
#[test]
|
||||
fn smoke_scan_sqlite_database() -> anyhow::Result<()> {
|
||||
use rusqlite::Connection;
|
||||
use std::process::Command;
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let db_path = dir.path().join("secrets.db");
|
||||
let github_pat = "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";
|
||||
|
||||
{
|
||||
let conn = Connection::open(&db_path)?;
|
||||
conn.execute_batch(&format!(
|
||||
"CREATE TABLE user_info (id INTEGER PRIMARY KEY, username TEXT, api_key TEXT);
|
||||
INSERT INTO user_info VALUES (1, 'alice', '{github_pat}');"
|
||||
))?;
|
||||
}
|
||||
|
||||
let findings_code = 200;
|
||||
|
||||
// With extraction enabled, the secret should be found and the path should
|
||||
// reference the table-level logical file (secrets.db/user_info.sql).
|
||||
let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
|
||||
.args([
|
||||
"scan",
|
||||
db_path.to_str().unwrap(),
|
||||
"--confidence=low",
|
||||
"--format",
|
||||
"json",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(findings_code)
|
||||
.stdout(predicates::str::contains(github_pat))
|
||||
.get_output()
|
||||
.stdout
|
||||
.clone();
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output);
|
||||
assert!(
|
||||
stdout.contains("user_info.sql"),
|
||||
"Expected table-level path in finding, got: {stdout}"
|
||||
);
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue