Automatically extracts and scans SQLite database contents for secrets stored in table rows

This commit is contained in:
Mick Grove 2026-02-22 23:35:18 -07:00
commit 1f4ccb8144
8 changed files with 365 additions and 5 deletions

View file

@ -51,6 +51,7 @@ pub mod scanner;
pub mod scanner_pool;
pub mod slack;
pub mod snippet;
pub mod sqlite;
pub mod update;
pub mod util;
pub mod validation;

View file

@ -39,9 +39,10 @@ use crate::{
scanner::{
processing::BlobProcessor,
runner::{create_datastore_channel, spawn_datastore_writer_thread},
util::is_compressed_file,
util::{is_compressed_file, is_sqlite_file},
},
scanner_pool::ScannerPool,
sqlite::extract_sqlite_contents,
DirectoryResult, EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator,
FoundInput, GitDiffConfig, GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator,
PathBuf,
@ -335,7 +336,32 @@ impl ParallelBlobIterator for FileResult {
let extraction_enabled = self.extract_archives;
let max_extraction_depth = self.extraction_depth;
if extraction_enabled && is_compressed_file(&self.path) {
if extraction_enabled && is_sqlite_file(&self.path) {
match extract_sqlite_contents(&self.path) {
Ok(tables) if tables.is_empty() => {
debug!("No tables found in SQLite database: {}", self.path.display());
Ok(None)
}
Ok(tables) => {
let items = tables
.into_iter()
.map(|(logical_name, data)| {
let full_path = self.path.join(logical_name);
let origin = OriginSet::new(Origin::from_file(full_path), vec![]);
(origin, Blob::from_bytes(data))
})
.collect();
Ok(Some(FileResultIter {
iter_kind: FileResultIterKind::Archive(items),
_marker: PhantomData,
}))
}
Err(e) => {
debug!("Failed to extract SQLite database {}: {e:#}", self.path.display());
Ok(None)
}
}
} else if extraction_enabled && is_compressed_file(&self.path) {
match decompress_file_to_temp(&self.path) {
Ok((content, _temp_dir)) => match content {
// Single-file decompression fully in memory.

View file

@ -30,3 +30,25 @@ pub fn is_compressed_file(path: &Path) -> bool {
false
}
}
const SQLITE_EXTENSIONS: &[&str] = &["db", "sqlite", "sqlite3", "db3", "s3db", "sl3"];
/// SQLite file header magic bytes. Useful for detecting extensionless SQLite
/// files (e.g. Chrome `Cookies`, `History`, `Web Data`).
#[allow(dead_code)]
pub const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
pub fn is_sqlite_file(path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
let ext_lower = ext.to_lowercase();
if SQLITE_EXTENSIONS.iter().any(|e| *e == ext_lower) {
return true;
}
}
false
}
/// Check the first 16 bytes of `data` for the SQLite magic header.
#[allow(dead_code)]
pub fn has_sqlite_magic(data: &[u8]) -> bool {
data.len() >= SQLITE_MAGIC.len() && data[..SQLITE_MAGIC.len()] == *SQLITE_MAGIC
}

260
src/sqlite.rs Normal file
View file

@ -0,0 +1,260 @@
use std::fmt::Write as FmtWrite;
use std::path::Path;
use anyhow::{Context, Result};
use rusqlite::{Connection, OpenFlags};
use tracing::debug;
const MAX_ROWS_PER_TABLE: usize = 100_000;
const MAX_TOTAL_BYTES: usize = 256 * 1024 * 1024;
/// Extract all user tables from a SQLite database as SQL text.
///
/// Returns a vec of `(logical_name, sql_text)` pairs, one per table.
/// Each entry contains the CREATE TABLE statement followed by INSERT
/// statements with explicit column names so that keyword-based secret
/// detectors can match column names like "api_key" near their values.
pub fn extract_sqlite_contents(path: &Path) -> Result<Vec<(String, Vec<u8>)>> {
let conn = Connection::open_with_flags(path, OpenFlags::SQLITE_OPEN_READ_ONLY)
.with_context(|| format!("Failed to open SQLite database: {}", path.display()))?;
conn.busy_timeout(std::time::Duration::from_secs(5))?;
let tables = list_user_tables(&conn)?;
if tables.is_empty() {
debug!("SQLite database has no user tables: {}", path.display());
return Ok(Vec::new());
}
let mut results = Vec::with_capacity(tables.len());
let mut total_bytes: usize = 0;
for (table_name, create_sql) in &tables {
if total_bytes >= MAX_TOTAL_BYTES {
debug!(
"SQLite extraction hit total size limit ({MAX_TOTAL_BYTES} bytes), \
skipping remaining tables in {}",
path.display()
);
break;
}
match dump_table(&conn, table_name, create_sql, MAX_TOTAL_BYTES - total_bytes) {
Ok(sql_text) => {
total_bytes += sql_text.len();
let logical_name = format!("{}.sql", table_name);
results.push((logical_name, sql_text.into_bytes()));
}
Err(e) => {
debug!(
"Failed to dump table '{}' from {}: {e:#}",
table_name,
path.display()
);
}
}
}
Ok(results)
}
/// List all user tables (excluding sqlite_* internal tables) along with
/// their CREATE TABLE SQL.
fn list_user_tables(conn: &Connection) -> Result<Vec<(String, String)>> {
let mut stmt = conn.prepare(
"SELECT name, sql FROM sqlite_master \
WHERE type = 'table' AND name NOT LIKE 'sqlite_%' \
ORDER BY name",
)?;
let rows = stmt.query_map([], |row| {
let name: String = row.get(0)?;
let sql: String = row.get(1)?;
Ok((name, sql))
})?;
let mut tables = Vec::new();
for row in rows {
tables.push(row?);
}
Ok(tables)
}
/// Dump a single table as SQL text: the CREATE statement followed by
/// INSERT INTO statements with named columns.
fn dump_table(
conn: &Connection,
table_name: &str,
create_sql: &str,
remaining_budget: usize,
) -> Result<String> {
let mut out = String::with_capacity(4096);
writeln!(out, "{create_sql};")?;
let col_names = column_names(conn, table_name)?;
if col_names.is_empty() {
return Ok(out);
}
let columns_fragment = col_names
.iter()
.map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
.collect::<Vec<_>>()
.join(",");
let query = format!("SELECT * FROM \"{}\"", table_name.replace('"', "\"\""));
let mut stmt = conn.prepare(&query)?;
let col_count = col_names.len();
let mut rows_emitted: usize = 0;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
if rows_emitted >= MAX_ROWS_PER_TABLE {
writeln!(out, "-- (truncated after {MAX_ROWS_PER_TABLE} rows)")?;
break;
}
if out.len() >= remaining_budget {
writeln!(out, "-- (truncated: size limit reached)")?;
break;
}
write!(out, "INSERT INTO \"{table_name}\" ({columns_fragment}) VALUES (")?;
for i in 0..col_count {
if i > 0 {
write!(out, ",")?;
}
write_value(&mut out, row, i)?;
}
writeln!(out, ");")?;
rows_emitted += 1;
}
Ok(out)
}
fn column_names(conn: &Connection, table_name: &str) -> Result<Vec<String>> {
let query = format!(
"PRAGMA table_info(\"{}\")",
table_name.replace('"', "\"\"")
);
let mut stmt = conn.prepare(&query)?;
let names = stmt
.query_map([], |row| {
let name: String = row.get(1)?;
Ok(name)
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(names)
}
fn write_value(out: &mut String, row: &rusqlite::Row<'_>, idx: usize) -> Result<()> {
use rusqlite::types::ValueRef;
match row.get_ref(idx)? {
ValueRef::Null => write!(out, "NULL")?,
ValueRef::Integer(i) => write!(out, "{i}")?,
ValueRef::Real(f) => write!(out, "{f}")?,
ValueRef::Text(t) => {
let s = String::from_utf8_lossy(t);
write!(out, "'{}'", s.replace('\'', "''"))?;
}
ValueRef::Blob(b) => {
write!(out, "X'")?;
for byte in b {
write!(out, "{byte:02X}")?;
}
write!(out, "'")?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
fn create_test_db() -> (NamedTempFile, std::path::PathBuf) {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
let conn = Connection::open(&path).unwrap();
conn.execute_batch(
"CREATE TABLE user_info (id INTEGER PRIMARY KEY, username TEXT, api_key TEXT);
INSERT INTO user_info VALUES (1, 'alice', 'ghp_abc123def456ghi789jkl012mno345pqr678');
INSERT INTO user_info VALUES (2, 'bob', 'AKIAIOSFODNN7EXAMPLE');
CREATE TABLE config (key TEXT, value TEXT);
INSERT INTO config VALUES ('db_password', 's3cret!passw0rd');",
)
.unwrap();
(tmp, path)
}
#[test]
fn extracts_all_tables() {
let (_tmp, path) = create_test_db();
let results = extract_sqlite_contents(&path).unwrap();
assert_eq!(results.len(), 2);
let names: Vec<&str> = results.iter().map(|(n, _)| n.as_str()).collect();
assert!(names.contains(&"config.sql"));
assert!(names.contains(&"user_info.sql"));
}
#[test]
fn output_contains_column_names_and_values() {
let (_tmp, path) = create_test_db();
let results = extract_sqlite_contents(&path).unwrap();
let user_info = results
.iter()
.find(|(n, _)| n == "user_info.sql")
.unwrap();
let sql = String::from_utf8_lossy(&user_info.1);
assert!(sql.contains("CREATE TABLE"));
assert!(sql.contains("\"api_key\""));
assert!(sql.contains("ghp_abc123def456ghi789jkl012mno345pqr678"));
assert!(sql.contains("INSERT INTO"));
}
#[test]
fn handles_empty_database() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
let conn = Connection::open(&path).unwrap();
conn.execute_batch("CREATE TABLE empty_table (id INTEGER);")
.unwrap();
let results = extract_sqlite_contents(&path).unwrap();
assert_eq!(results.len(), 1);
let sql = String::from_utf8_lossy(&results[0].1);
assert!(sql.contains("CREATE TABLE"));
assert!(!sql.contains("INSERT INTO"));
}
#[test]
fn handles_nonexistent_file() {
let result = extract_sqlite_contents(Path::new("/nonexistent/database.db"));
assert!(result.is_err());
}
#[test]
fn handles_special_characters_in_values() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
let conn = Connection::open(&path).unwrap();
conn.execute_batch(
"CREATE TABLE t (id INTEGER PRIMARY KEY, val TEXT);
INSERT INTO t VALUES (1, 'it''s a test');
INSERT INTO t VALUES (2, NULL);",
)
.unwrap();
let results = extract_sqlite_contents(&path).unwrap();
let sql = String::from_utf8_lossy(&results[0].1);
assert!(sql.contains("'it''s a test'"));
assert!(sql.contains("NULL"));
}
}