diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 27e1f98..beeec7f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,6 +5,9 @@ on:
      branches:
        - main
 
+env:
+  RUST_TOOLCHAIN: "1.90"
+
 # This workflow runs on pull requests to the main branch
 # It builds the project for 2 platforms, Linux arm64 and macOS arm64,
 # and runs tests for each platform. All platforms tested on merge to main
@@ -16,7 +19,7 @@ jobs:
        - uses: actions/checkout@v4
        - uses: actions-rs/toolchain@v1
          with:
-           toolchain: 1.88.0
+           toolchain: ${{ env.RUST_TOOLCHAIN }}
            profile: minimal
            override: true
        - uses: swatinem/rust-cache@v2
@@ -34,7 +37,7 @@ jobs:
        - uses: actions/checkout@v4
        - uses: actions-rs/toolchain@v1
          with:
-           toolchain: 1.88.0
+           toolchain: ${{ env.RUST_TOOLCHAIN }}
            profile: minimal
            override: true
        - uses: swatinem/rust-cache@v2
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index abe38f9..4debe1d 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,6 +4,8 @@ on:
   push:
     branches:
       - main
+env:
+  RUST_TOOLCHAIN: "1.90"
 
 jobs:
   # ──────────────── Linux (via Makefile) ────────────────
@@ -15,7 +17,7 @@ jobs:
 
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.88.0
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           profile: minimal
           override: true
 
@@ -69,7 +71,7 @@ jobs:
 
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.88.0
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           profile: minimal
           override: true
 
@@ -123,7 +125,7 @@ jobs:
 
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.88.0
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           profile: minimal
           override: true
 
@@ -154,7 +156,7 @@ jobs:
 
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.88.0
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           profile: minimal
           override: true
 
@@ -185,7 +187,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.88.0
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           profile: minimal
           override: true
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1be8150..0f7df0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 All notable changes to this project will be documented in this file.
 
+## [v1.58.0]
+- Added first-class Hugging Face scanning support, including CLI enumeration, token authentication, and integration with remote scans.
+- Condensed GitError formatting to report the exit status and the first informative lines from stdout/stderr, producing concise git clone failure logs.
+- Added support for scanning Google Cloud Storage buckets via `--gcs-bucket`, including optional prefixes and service-account authentication.
+- Added `--skip-aws-account` (now accepting comma-separated values) and `--skip-aws-account-file` to bypass live AWS validation for known canary/honey-token account IDs without triggering alerts. Kingfisher now ships with several canary AWS account IDs pre-seeded in the skip list and now reports matching findings as "Not Attempted" with the "Response" containing "(skip list entry)" so it's clear that validation was intentionally skipped and why.
+  
 ## [v1.57.0]
 - Added inline ignore directive detection to treat suppression tokens anywhere on surrounding lines, including multi-line handling
 - Added a `--no-ignore` CLI flag to disable inline directives when you need every potential secret reported
diff --git a/Cargo.toml b/Cargo.toml
index 852194d..1eb11b5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace.package]
 edition = "2021"
-rust-version = "1.88"
+rust-version = "1.90"
 license = "Apache-2.0"
 authors = ["Mick Grove <mick.grove@mongodb.com>"]
 homepage = "https://github.com/mongodb/kingfisher"
@@ -10,7 +10,7 @@ publish = false
 
 [package]
 name = "kingfisher"
-version = "1.57.0"
+version = "1.58.0"
 description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
 edition.workspace = true
 rust-version.workspace = true
@@ -122,7 +122,6 @@ reqwest-middleware = "0.4.2"
 tracing-subscriber = {version = "0.3.19", features = ["env-filter"] }
 tracing-core = "0.1.34"
 tree-sitter = "0.25.8"
-aws-smithy-runtime = "1.9.1"
 aws-smithy-http-client = "1.1.1"
 aws-smithy-runtime-api = "1.9.0"
 aws-smithy-types = "1.3.2"
@@ -192,6 +191,10 @@ walkdir = "2.5.0"
 p256 = "0.13.2"
 ed25519-dalek = { version = "2.2", features = ["pkcs8"] }
 aws-sdk-s3 = "1.100.0"
+gcloud-storage = { version = "1.1.1", default-features = false, features = [
+    "rustls-tls",
+    "auth",
+] }
 tokei = "12.1.2"
 
 [target.'cfg(not(windows))'.dependencies]
diff --git a/Makefile b/Makefile
index 14a4c6a..0325d64 100644
--- a/Makefile
+++ b/Makefile
@@ -110,11 +110,11 @@ setup-zig:
 ubuntu-x64: setup-zig   # ensures Zig & cargo-zigbuild exist
 	@echo "Checking Rust toolchain…"
 	@$(MAKE) check-rust || { \
-	    echo "🦀  Installing Rust 1.88.0 …"; \
+            echo "🦀  Installing Rust 1.90.0 …"; \
 	    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
 	    . $$HOME/.cargo/env; \
-	    rustup toolchain install 1.88.0; \
-	    rustup default 1.88.0; \
+            rustup toolchain install 1.90.0; \
+            rustup default 1.90.0; \
 	}
 
 	@echo "📦  Installing build dependencies (musl, cmake, etc.)…"
@@ -150,11 +150,11 @@ ubuntu-x64: setup-zig   # ensures Zig & cargo-zigbuild exist
 ubuntu-arm64: setup-zig   # ensures Zig & cargo-zigbuild exist
 	@echo "Checking Rust toolchain…"
 	@$(MAKE) check-rust || { \
-	    echo "🦀  Installing Rust 1.88.0 …"; \
+            echo "🦀  Installing Rust 1.90.0 …"; \
 	    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
 	    . $$HOME/.cargo/env; \
-	    rustup toolchain install 1.88.0; \
-	    rustup default 1.88.0; \
+            rustup toolchain install 1.90.0; \
+            rustup default 1.90.0; \
 	}
 
 	@echo "📦  Installing build dependencies (musl, cmake, etc.)…"
@@ -245,7 +245,7 @@ endif
 linux-x64: check-docker create-dockerignore
 	@mkdir -p target/release
 	docker run --platform linux/amd64 --rm \
-	  -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\
+          -v "$$(pwd):/src" -w /src rust:1.90-alpine sh -eu -c '\
 		apk add --no-cache \
 		    musl-dev \
 		    gcc g++ make cmake pkgconfig \
@@ -256,7 +256,7 @@ linux-x64: check-docker create-dockerignore
 		    patch perl ragel && \
 	        git openssl-dev curl && \
 		\
-		cargo test --workspace --all-targets --release ; \
+		cargo test --workspace --all-targets ; \
 		\
 		rustup target add x86_64-unknown-linux-musl && \
 		\
@@ -274,7 +274,7 @@ linux-x64: check-docker create-dockerignore
 linux-arm64: check-docker create-dockerignore
 	@mkdir -p target/release
 	docker run --platform linux/arm64 --rm \
-	  -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\
+          -v "$$(pwd):/src" -w /src rust:1.90-alpine sh -eu -c '\
 		apk add --no-cache \
 		    musl-dev \
 		    gcc g++ make cmake pkgconfig \
@@ -287,7 +287,7 @@ linux-arm64: check-docker create-dockerignore
 		\
 		rustup target add aarch64-unknown-linux-musl && \
 		\
-		cargo test --workspace --all-targets --release ; \
+		cargo test --workspace --all-targets ; \
 		\
 		export PKG_CONFIG_ALLOW_CROSS=1 ; \
 		export RUSTFLAGS="-C target-feature=+crt-static" ; \
@@ -385,7 +385,7 @@ check-rust:
 	  echo "Rust not found."; \
 	  exit 1; \
 	fi; \
-	required=1.88.0; \
+        required=1.90.0; \
 	if [ $$(printf '%s\n' "$$required" "$$version" | sort -V | head -n1) != "$$required" ]; then \
 	  echo "Rust version $$version is older than required $$required."; \
 	  exit 1; \
diff --git a/README.md b/README.md
index d77d01e..bd07bc7 100644
--- a/README.md
+++ b/README.md
@@ -5,22 +5,23 @@
 
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 
-Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware source code parsing, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
+Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s SIMD accelerated regex engine (Hyperscan) with language‑aware source code parsing, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
 </p>
 
-Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live cloud-API validation; many more targets (GitLab, BitBucket, Gitea, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details.
+Originally forked from Praetorian’s Nosey Parker, Kingfisher has since significantly expanded and diverged, adding live validation, 10+ new scan targets, and major architectural enhancements. See [Origins and Divergence](#origins-and-divergence) for details.
+
 ## Key Features
 
 ### Multiple Scan Targets
 <div align="center">
 
-| Files / Dirs | Local Git | GitHub | GitLab | Azure DevOps | Bitbucket | Gitea |
-|:-------------:|:----------:|:------:|:------:|:-------------:|:----------:|:------:|
-| <img src="./docs/assets/icons/files.svg" height="40" alt="Files / Dirs"/><br/><sub>Files / Dirs</sub> | <img src="./docs/assets/icons/local-git.svg" height="40" alt="Local Git"/><br/><sub>Local Git</sub> | <img src="./docs/assets/icons/github.svg" height="40" alt="GitHub"/><br/><sub>GitHub</sub> | <img src="./docs/assets/icons/gitlab.svg" height="40" alt="GitLab"/><br/><sub>GitLab</sub> | <img src="./docs/assets/icons/azure-devops.svg" height="40" alt="Azure DevOps"/><br/><sub>Azure DevOps</sub> | <img src="./docs/assets/icons/bitbucket.svg" height="40" alt="Bitbucket"/><br/><sub>Bitbucket</sub> | <img src="./docs/assets/icons/gitea.svg" height="40" alt="Gitea"/><br/><sub>Gitea</sub> |
+| Files / Dirs | Local Git | GitHub | GitLab | Azure Repos | Bitbucket | Gitea | Hugging Face |
+|:-------------:|:----------:|:------:|:------:|:-------------:|:----------:|:------:|:-------------:|
+| <img src="./docs/assets/icons/files.svg" height="40" alt="Files / Dirs"/><br/><sub>Files / Dirs</sub> | <img src="./docs/assets/icons/local-git.svg" height="40" alt="Local Git"/><br/><sub>Local Git</sub> | <img src="./docs/assets/icons/github.svg" height="40" alt="GitHub"/><br/><sub>GitHub</sub> | <img src="./docs/assets/icons/gitlab.svg" height="40" alt="GitLab"/><br/><sub>GitLab</sub> | <img src="./docs/assets/icons/azure-devops.svg" height="40" alt="Azure Repos"/><br/><sub>Azure Repos</sub> | <img src="./docs/assets/icons/bitbucket.svg" height="40" alt="Bitbucket"/><br/><sub>Bitbucket</sub> | <img src="./docs/assets/icons/gitea.svg" height="40" alt="Gitea"/><br/><sub>Gitea</sub> |<img src="./docs/assets/icons/huggingface.svg" height="40" width="40" alt="Hugging Face"/><br/><sub>Hugging Face</sub> |
 
-| Docker | Jira | Confluence | Slack | AWS S3 |
-|:------:|:----:|:-----------:|:-----:|:------:|
-| <img src="./docs/assets/icons/docker.svg" height="40" alt="Docker"/><br/><sub>Docker</sub> | <img src="./docs/assets/icons/jira.svg" height="40" alt="Jira"/><br/><sub>Jira</sub> | <img src="./docs/assets/icons/confluence.svg" height="40" alt="Confluence"/><br/><sub>Confluence</sub> | <img src="./docs/assets/icons/slack.svg" height="40" alt="Slack"/><br/><sub>Slack</sub> | <img src="./docs/assets/icons/aws-s3.svg" height="40" alt="AWS S3"/><br/><sub>AWS&nbsp;S3</sub> |
+| Docker | Jira | Confluence | Slack | AWS S3 | Google Cloud |
+|:------:|:----:|:-----------:|:-----:|:------:|:---:|
+| <img src="./docs/assets/icons/docker.svg" height="40" alt="Docker"/><br/><sub>Docker</sub> | <img src="./docs/assets/icons/jira.svg" height="40" alt="Jira"/><br/><sub>Jira</sub> | <img src="./docs/assets/icons/confluence.svg" height="40" alt="Confluence"/><br/><sub>Confluence</sub> | <img src="./docs/assets/icons/slack.svg" height="40" alt="Slack"/><br/><sub>Slack</sub> | <img src="./docs/assets/icons/aws-s3.svg" height="40" alt="AWS S3"/><br/><sub>AWS&nbsp;S3</sub> |  <img src="./docs/assets/icons/gcs.svg" height="40" alt="Google Cloud Storage"/><br/><sub>Cloud Storage</sub> |
 
 </div>
 
@@ -41,17 +42,27 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
   <img src="docs/runtime-comparison.png" alt="Kingfisher Runtime Comparison" style="vertical-align: center;" />
 </p>
 
+# Table of Contents
+
+<details>
+
 - [Kingfisher](#kingfisher)
   - [Key Features](#key-features)
     - [Multiple Scan Targets](#multiple-scan-targets)
     - [Performance, Accuracy, and Hundreds of Rules](#performance-accuracy-and-hundreds-of-rules)
 - [Benchmark Results](#benchmark-results)
+- [Table of Contents](#table-of-contents)
 - [Getting Started](#getting-started)
   - [Installation](#installation)
-    - [Run Kingfisher in Docker](#run-kingfisher-in-docker)
+    - [Pre-built Releases](#pre-built-releases)
+    - [Homebrew](#homebrew)
+    - [Linux and macOS](#linux-and-macos)
+    - [Windows](#windows)
+    - [Compile](#compile)
+    - [ Run Kingfisher in Docker](#-run-kingfisher-in-docker)
 - [🔐 Detection Rules at a Glance](#-detection-rules-at-a-glance)
-  - [Write Custom Rules!](#write-custom-rules)
-- [Usage](#usage)
+  - [📝 Write Custom Rules!](#-write-custom-rules)
+- [🎉 Usage](#-usage)
   - [Basic Examples](#basic-examples)
     - [Scan with secret validation](#scan-with-secret-validation)
     - [Scan a directory containing multiple Git repositories](#scan-a-directory-containing-multiple-git-repositories)
@@ -67,6 +78,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
     - [Exclude specific paths](#exclude-specific-paths)
     - [Scan changes in CI pipelines](#scan-changes-in-ci-pipelines)
   - [ Scanning an AWS S3 Bucket](#-scanning-an-aws-s3-bucket)
+  - [ Scanning a Google Cloud Storage Bucket](#-scanning-a-google-cloud-storage-bucket)
   - [ Scanning Docker Images](#-scanning-docker-images)
   - [ Scanning GitHub](#-scanning-github)
     - [Scan GitHub organization (requires `KF_GITHUB_TOKEN`)](#scan-github-organization-requires-kf_github_token)
@@ -79,8 +91,8 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
     - [Scan remote GitLab repository by URL](#scan-remote-gitlab-repository-by-url)
     - [List GitLab repositories](#list-gitlab-repositories)
   - [ Scanning Azure Repos](#-scanning-azure-repos)
-    - [Scan Azure DevOps organization or collection (requires `KF_AZURE_TOKEN` or `KF_AZURE_PAT`)](#scan-azure-devops-organization-or-collection-requires-kf_azure_token-or-kf_azure_pat)
-    - [Scan specific Azure DevOps projects](#scan-specific-azure-devops-projects)
+    - [Scan Azure Repos organization or collection (requires `KF_AZURE_TOKEN` or `KF_AZURE_PAT`)](#scan-azure-repos-organization-or-collection-requires-kf_azure_token-or-kf_azure_pat)
+    - [Scan specific Azure Repos projects](#scan-specific-azure-repos-projects)
     - [Skip specific Azure repositories during enumeration](#skip-specific-azure-repositories-during-enumeration)
     - [List Azure repositories](#list-azure-repositories)
   - [ Scanning Gitea](#-scanning-gitea)
@@ -97,6 +109,12 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
     - [List Bitbucket repositories](#list-bitbucket-repositories)
     - [Authenticate to Bitbucket](#authenticate-to-bitbucket)
     - [Self-hosted Bitbucket Server](#self-hosted-bitbucket-server)
+  - [ Scanning Hugging Face](#-scanning-hugging-face)
+    - [Scan Hugging Face user](#scan-hugging-face-user)
+    - [Scan Hugging Face organization](#scan-hugging-face-organization)
+    - [Scan specific Hugging Face resources](#scan-specific-hugging-face-resources)
+    - [List Hugging Face repositories](#list-hugging-face-repositories)
+    - [Authenticate to Hugging Face](#authenticate-to-hugging-face)
   - [ Scanning Jira](#-scanning-jira)
     - [Scan Jira issues matching a JQL query](#scan-jira-issues-matching-a-jql-query)
     - [Scan the last 1,000 Jira issues:](#scan-the-last-1000-jira-issues)
@@ -107,7 +125,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
   - [Environment Variables for Tokens](#environment-variables-for-tokens)
   - [Exit Codes](#exit-codes)
   - [Update Checks](#update-checks)
-- [Advanced Options](#advanced-options)
+- [🤓 Advanced Options](#-advanced-options)
   - [Build a Baseline / Detect New Secrets](#build-a-baseline--detect-new-secrets)
   - [List Builtin Rules](#list-builtin-rules)
   - [To scan using **only** your own `my_rules.yaml` you could run:](#to-scan-using-only-your-own-my_rulesyaml-you-could-run)
@@ -117,6 +135,8 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
   - [Notable Scan Options](#notable-scan-options)
   - [Understanding `--confidence`](#understanding---confidence)
     - [Ignore known false positives](#ignore-known-false-positives)
+    - [Skip Canary Tokens (AWS)](#skip-canary-tokens-aws)
+      - [Common CLI flows](#common-cli-flows)
     - [Inline ignore directives](#inline-ignore-directives)
   - [Finding Fingerprint](#finding-fingerprint)
   - [Rule Performance Profiling](#rule-performance-profiling)
@@ -125,36 +145,58 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
 - [Roadmap](#roadmap)
 - [License](#license)
 
+</details>
+
+
 # Getting Started
 ## Installation
+### Pre-built Releases
+Pre-built binaries are available from the [Releases](https://github.com/mongodb/kingfisher/releases) section.
+
+### Homebrew
 
-On macOS, you can simply
 
 ```bash
 brew install kingfisher
 ```
 
-Pre-built binaries are also available on the [Releases](https://github.com/mongodb/kingfisher/releases) section of this page.
+### Linux and macOS
 
-You can also install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform:
+<details>
+
+You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
 
 ```bash
 # Linux, macOS
 curl --silent --location \
     https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.sh | \
     sh && \
-  ubi --project mongodb/kingfisher --in "$HOME/bin"
+  ubi --project mongodb/kingfisher --in "$HOME/.local/bin"
 ```
 
+This installs and runs `ubi` and then places the `kingfisher` executable in `~/.local/bin` on Unix-like systems.
+
+</details>
+
+### Windows
+
+<details>
+
+You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
+
 ```powershell
 # Windows
 powershell -exec bypass -c "Invoke-WebRequest -URI 'https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.ps1' -UseBasicParsing | Invoke-Expression" && ubi --project mongodb/kingfisher --in .
 ```
 
-This installs `ubi` and then places the `kingfisher` executable in `~/bin` on Unix-like
-systems (or the current directory on Windows).
+This installs and runs `ubi` and then places the `kingfisher` executable in the current directory on Windows.
+</details>
 
-Or you may compile for your platform via `make`:
+
+### Compile
+You may compile for your platform via `make`
+
+<details>
 
 ```bash
 # NOTE: Requires Docker
@@ -174,9 +216,14 @@ make darwin-all # builds both x64 and arm64
 make all # builds for every OS and architecture supported
 ```
 
-### Run Kingfisher in Docker
+</details>
+
+### <img src="./docs/assets/icons/docker.svg" height="40" style="vertical-align:text-bottom;" alt="Docker"/> Run Kingfisher in Docker
+
+Run the dockerized Kingfisher container
+
+<details>
 
-Run the dockerized Kingfisher container:
 ```bash
 # GitHub Container Registry 
 docker run --rm ghcr.io/mongodb/kingfisher:latest --version
@@ -233,6 +280,8 @@ docker run --rm \
 
 ```
 
+</details>
+
 # 🔐 Detection Rules at a Glance
 
 Kingfisher ships with [hundreds of rules](/data/rules/) that cover everything from classic cloud keys to the latest AI SaaS tokens. Below is an overview:
@@ -248,7 +297,7 @@ Kingfisher ships with [hundreds of rules](/data/rules/) that cover everything fr
 | **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more |
 | **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more |
 
-## Write Custom Rules!
+## 📝 Write Custom Rules!
 
 Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential.
 
@@ -258,7 +307,7 @@ First, review [docs/RULES.md](/docs/RULES.md) to learn how to create custom King
 
 Once you've done that, you can provide your custom rules (defined in a YAML file) and provide it to Kingfisher at runtime --- no recompiling required!
 
-# Usage
+# 🎉 Usage
 
 ## Basic Examples
 
@@ -400,7 +449,7 @@ kingfisher scan ./my-project \
   -v
 ```
 
-## <img alt="GitHub" src="./docs/assets/icons/aws-s3.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning an AWS S3 Bucket
+## <img alt="GitHub" src="./docs/assets/icons/aws-s3.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning an AWS S3 Bucket
 You can scan S3 objects directly:
 
 ```bash
@@ -415,7 +464,7 @@ Credential resolution happens in this order:
 
 If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role.
 
-Examples:
+Examples
 
 ```bash
 # using explicit keys
@@ -452,7 +501,30 @@ docker run --rm \
     scan --s3-bucket bucket-name
 ```
 
-## <img alt="Docker" src="./docs/assets/icons/docker.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Docker Images
+## <img src="./docs/assets/icons/gcs.svg" height="40" width="40" alt="Google Cloud Storage"/> Scanning a Google Cloud Storage Bucket
+
+The `--gcs-bucket` flag streams objects directly from Google Cloud Storage. Authentication uses
+Application Default Credentials, so you can provide a service-account JSON file via the
+`GOOGLE_APPLICATION_CREDENTIALS` environment variable or by passing `--gcs-service-account`. Public
+buckets work without credentials.
+
+```bash
+kingfisher scan --gcs-bucket bucket-name
+
+# scan a sub-tree inside the bucket
+kingfisher scan --gcs-bucket bucket-name --gcs-prefix path/to/data/
+
+# supply a service-account key explicitly
+kingfisher scan --gcs-bucket bucket-name --gcs-service-account /path/to/key.json
+```
+
+Functional example:
+```bash
+kingfisher scan --gcs-bucket cloud-samples-data --gcs-prefix "storage/"
+```
+
+
+## <img alt="Docker" src="./docs/assets/icons/docker.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Docker Images
 
 Kingfisher will first try to use any locally available image, then fall back to pulling via OCI.  
 
@@ -482,7 +554,7 @@ kingfisher scan --docker-image some-private-registry.dkr.ecr.us-east-1.amazonaws
 kingfisher scan --docker-image private.registry.example.com/my-image:tag
 ```
 
-## <img alt="GitHub" src="./docs/assets/icons/github.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning GitHub
+## <img alt="GitHub" src="./docs/assets/icons/github.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning GitHub
 
 ### Scan GitHub organization (requires `KF_GITHUB_TOKEN`)
 
@@ -524,7 +596,7 @@ KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/priva
 
 ---
 
-## <img alt="GitLab" src="./docs/assets/icons/gitlab.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning GitLab
+## <img alt="GitLab" src="./docs/assets/icons/gitlab.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning GitLab
 
 ### Scan GitLab group (requires `KF_GITLAB_TOKEN`)
 
@@ -580,18 +652,18 @@ kingfisher gitlab repos list --group my-group --include-subgroups
 # skip specific projects when listing or scanning (supports glob patterns)
 kingfisher gitlab repos list --group my-group --gitlab-exclude my-group/**/legacy-*
 ```
-## <img alt="Azure Repos" src="./docs/assets/icons/azure-devops.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Azure Repos
+## <img alt="Azure Repos" src="./docs/assets/icons/azure-devops.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Azure Repos
 
-### Scan Azure DevOps organization or collection (requires `KF_AZURE_TOKEN` or `KF_AZURE_PAT`)
+### Scan Azure Repos organization or collection (requires `KF_AZURE_TOKEN` or `KF_AZURE_PAT`)
 
 ```bash
 kingfisher scan --azure-organization my-org
 
-# Azure DevOps Server example
+# Azure Repos Server example
 KF_AZURE_PAT="pat" kingfisher scan --azure-organization DefaultCollection --azure-base-url https://ado.internal.example/tfs/
 ```
 
-### Scan specific Azure DevOps projects
+### Scan specific Azure Repos projects
 
 Projects are specified as `ORGANIZATION/PROJECT`. Repeat the flag for multiple projects.
 
@@ -621,7 +693,7 @@ kingfisher azure repos list --project my-org/app --project my-org/api
 # skip specific repositories while listing (supports glob patterns)
 kingfisher azure repos list --organization my-org --azure-exclude my-org/**/experimental-*
 ```
-## <img alt="Gitea" src="./docs/assets/icons/gitea.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Gitea
+## <img alt="Gitea" src="./docs/assets/icons/gitea.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Gitea
 
 ### Scan Gitea organization (requires `KF_GITEA_TOKEN`)
 
@@ -673,7 +745,7 @@ KF_GITEA_TOKEN="gtoken" kingfisher gitea repos list --all-gitea-organizations
 # self-hosted example
 KF_GITEA_TOKEN="gtoken" kingfisher gitea repos list --user johndoe --gitea-api-url https://gitea.internal.example/api/v1/
 ```
-## <img alt="Bitbucket" src="./docs/assets/icons/bitbucket.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Bitbucket
+## <img alt="Bitbucket" src="./docs/assets/icons/bitbucket.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Bitbucket
 ### Scan Bitbucket workspace
 
 ```bash
@@ -745,7 +817,45 @@ Use `--bitbucket-api-url` to point Kingfisher at your server's REST endpoint, fo
 `https://bitbucket.example.com/rest/api/1.0/`. Provide credentials with
 `--bitbucket-username` and `--bitbucket-token`, and pass `--ignore-certs` when
 connecting to HTTP or otherwise insecure instances.
-## <img alt="Jira" src="./docs/assets/icons/jira.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Jira
+## <img src="./docs/assets/icons/huggingface.svg" height="40" width="40" alt="Hugging Face"/> Scanning Hugging Face
+
+Hugging Face hosts git repositories for models, datasets, and Spaces. Kingfisher can enumerate and scan all three resource types.
+
+### Scan Hugging Face user
+
+```bash
+kingfisher scan --huggingface-user <username>
+```
+
+### Scan Hugging Face organization
+
+```bash
+kingfisher scan --huggingface-organization <orgname>
+```
+
+### Scan specific Hugging Face resources
+
+Scan individual repositories by ID (owner/name) or by passing the full HTTPS URL:
+
+```bash
+kingfisher scan --huggingface-model <owner/model>
+kingfisher scan --huggingface-dataset https://huggingface.co/datasets/<owner>/<dataset>
+kingfisher scan --huggingface-space <owner/space>
+```
+
+Use `--huggingface-exclude` to omit results returned by user or organization enumeration. Prefix values with `model:`, `dataset:`, or `space:` when you only want to skip a specific resource type.
+
+### List Hugging Face repositories
+
+```bash
+kingfisher huggingface repos list --huggingface-user <username>
+```
+
+### Authenticate to Hugging Face
+
+Private repositories require an access token provided through the `KF_HUGGINGFACE_TOKEN` environment variable. For git authentication the helper also honours `KF_HUGGINGFACE_USERNAME` (default `hf_user`).
+
+## <img alt="Jira" src="./docs/assets/icons/jira.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Jira
 
 ### Scan Jira issues matching a JQL query
 
@@ -764,7 +874,7 @@ KF_JIRA_TOKEN="token" kingfisher scan \
   --max-results 1000
 ```
 
-## <img alt="Confluence" src="./docs/assets/icons/confluence.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Confluence
+## <img alt="Confluence" src="./docs/assets/icons/confluence.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Confluence
 ### Scan Confluence pages matching a CQL query
 
 ```bash
@@ -789,7 +899,7 @@ Generate a personal access token and set it in the `KF_CONFLUENCE_TOKEN` environ
 
 To use basic authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a Basic auth header. If the server responds with a redirect to a login page, the credentials are invalid or lack the required permissions.
 
-## <img alt="Slack" src="./docs/assets/icons/slack.svg" width="20" height="20" style="vertical-align:text-bottom;"> Scanning Slack
+## <img alt="Slack" src="./docs/assets/icons/slack.svg" width="40" height="40" style="vertical-align:text-bottom;"> Scanning Slack
 ### Scan Slack messages matching a search query
 
 ```bash
@@ -811,11 +921,13 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \
 | `KF_GITLAB_TOKEN` | GitLab Personal Access Token |
 | `KF_GITEA_TOKEN` | Gitea Personal Access Token |
 | `KF_GITEA_USERNAME` | Username for private Gitea clones (used with `KF_GITEA_TOKEN`) |
-| `KF_AZURE_TOKEN` / `KF_AZURE_PAT` | Azure DevOps Personal Access Token |
-| `KF_AZURE_USERNAME` | Username to use with Azure DevOps PATs (defaults to `pat` when unset) |
+| `KF_AZURE_TOKEN` / `KF_AZURE_PAT` | Azure Repos Personal Access Token |
+| `KF_AZURE_USERNAME` | Username to use with Azure Repos PATs (defaults to `pat` when unset) |
 | `KF_BITBUCKET_USERNAME` | Bitbucket username for basic authentication |
 | `KF_BITBUCKET_APP_PASSWORD` / `KF_BITBUCKET_TOKEN` | Bitbucket app password or server token |
 | `KF_BITBUCKET_OAUTH_TOKEN` | Bitbucket OAuth or PAT token |
+| `KF_HUGGINGFACE_TOKEN` | Hugging Face access token for API enumeration and git cloning |
+| `KF_HUGGINGFACE_USERNAME` | Optional username for Hugging Face git operations (defaults to `hf_user`) |
 | `KF_JIRA_TOKEN`   | Jira API token               |
 | `KF_CONFLUENCE_TOKEN` | Confluence API token      |
 | `KF_SLACK_TOKEN`  | Slack API token              |
@@ -869,7 +981,7 @@ Kingfisher automatically queries GitHub for a newer release when it starts and t
 
 - **Disable version checks** – Pass `--no-update-check` to skip both the startup and shutdown checks entirely
 
-# Advanced Options
+# 🤓 Advanced Options
 
 ## Build a Baseline / Detect New Secrets
 
@@ -963,6 +1075,8 @@ leaves the default unchanged.
 - `--manage-baseline`: Create or update the baseline file with current findings
 - `--skip-regex <PATTERN>`: Ignore findings whose text matches this regex (repeatable)
 - `--skip-word <WORD>`: Ignore findings containing this case-insensitive word (repeatable)
+- `--skip-aws-account <ACCOUNT_ID>`: Skip live AWS validation for findings tied to the specified AWS account number (repeatable, accepts comma-separated lists)
+- `--skip-aws-account-file <FILE>`: Load AWS account numbers to skip from a file (one account per line; `#` comments allowed)
 - `--ignore-comment <DIRECTIVE>`: Honor additional inline directives from other scanners (repeatable; e.g. `--ignore-comment "gitleaks:allow"`)
 - `--no-ignore`: Disable inline directives entirely so every match is reported
 ## Understanding `--confidence`
@@ -996,6 +1110,66 @@ kingfisher scan \
 
 If a `--skip-regex` regular expression fails to compile, the scan aborts with an error so that typos are caught early.
 
+### Skip Canary Tokens (AWS)
+
+Canary/honey tokens are intentionally leaked credentials used to catch misuse. Kingfisher can **recognize and skip** known AWS canary accounts so hygiene scans don’t set off alerts.
+
+**How to skip**  
+Pass the 12-digit AWS account IDs for your canaries via `--skip-aws-account` (comma-separated) or `--skip-aws-account-file` (one ID per line; blank lines and `#` comments allowed). Kingfisher also ships with a **pre-seeded (but not exhaustive)** list of Thinkst Canary account IDs used by canarytokens.org, so many are skipped automatically.
+
+```bash
+kingfisher scan /path/to/code \
+  --skip-aws-account "171436882533,534261010715"
+
+# or combine preloaded canary IDs with a just-created decoy account
+printf '999900001111 \n534261010715' > /tmp/canary_accounts.txt
+
+kingfisher scan /path/to/repo \
+  --skip-aws-account-file /tmp/canary_accounts.txt
+
+```
+
+**What you’ll see**  
+Findings tied to a skip-listed account report `Validation: Not Attempted` and note in the `Response:` that the entry came from the skip list:
+
+```bash
+AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
+ |Finding.......: <REDACTED>
+ |Fingerprint...: 2141074333616819500
+ |Confidence....: medium
+ |Entropy.......: 5.00
+ |Validation....: Not Attempted
+ |__Response....: (skip list entry) AWS validation not attempted for account 171436882533.
+ |Language......: Unknown
+ |Line Num......: 21
+ |Path..........: /tmp/test_canary_accounts.log
+```
+
+**Why this matters**
+Skipping prevents noisy tripwires in prod telemetry while keeping the status explicit—“Not Attempted” isn’t a pass. If needed, verify these credentials out-of-band or with a safe, non-triggering method.
+
+
+#### Common CLI flows
+
+```bash
+# Skip a few in-house canaries during a filesystem scan
+kingfisher scan repo/ \
+  --skip-aws-account "111122223333,444455556666"
+
+# Read a longer list from disk
+kingfisher scan repo/ \
+  --skip-aws-account-file /tmp/scripts/canary_accounts.txt
+
+# Combine preloaded canary IDs with a just-created decoy account
+printf '999900001111\n534261010715\n' > /tmp/new_canary.txt
+
+kingfisher scan /path/to/repo \
+  --skip-aws-account-file /tmp/new_canary.txt
+
+```
+
+Tip: if you manage multiple canary fleets (Thinkst, self-hosted alternatives, or bespoke decoys), checkpoint the account IDs alongside your infrastructure-as-code so security teams can rotate or expand the skip list without editing pipelines.
+
 ### Inline ignore directives
 
 Add `kingfisher:ignore` anywhere on the same line as a finding to silence it. Multi-line strings and PEM-style blocks may also be ignored by placing the directive on the closing delimiter line (for example, `"""  # kingfisher:ignore`), on the next logical line after the string, **or** on a comment immediately before the value:
diff --git a/docs/assets/icons/gcs.svg b/docs/assets/icons/gcs.svg
new file mode 100644
index 0000000..842c121
--- /dev/null
+++ b/docs/assets/icons/gcs.svg
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg id="standard_product_icon" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 512 512">
+  <!-- Generator: Adobe Illustrator 29.1.0, SVG Export Plug-In . SVG Version: 2.1.0 Build 142)  -->
+  <defs>
+    <style>
+      .st0 {
+        fill: none;
+      }
+
+      .st1 {
+        fill: #4285f4;
+      }
+
+      .st2 {
+        fill: #34a853;
+      }
+
+      .st3 {
+        fill: #fbbc04;
+      }
+
+      .st4 {
+        fill: #ea4335;
+      }
+    </style>
+  </defs>
+  <g id="bounding_box">
+    <rect class="st0" width="512" height="512"/>
+  </g>
+  <g id="art">
+    <path class="st2" d="M442,277.9H70c-8.8,0-16,7.2-16,16v148.1c0,8.8,7.2,16,16,16h107.2c1.5.5,3.1.7,4.8.7s3.3-.3,4.8-.7h255.2c8.8,0,16-7.2,16-16v-148.1c0-8.8-7.2-16-16-16ZM86,309.9h80v116.1h-80v-116.1ZM426,425.9h-228v-116.1h228v116.1Z"/>
+    <path class="st3" d="M442,54H70c-8.8,0-16,7.2-16,16v148.8c0,8.8,7.2,16,16,16h372c8.8,0,16-7.2,16-16V70c0-8.8-7.2-16-16-16ZM86,86h80v116.8h-80v-116.8ZM426,202.8h-228v-116.8h228v116.8Z"/>
+    <path class="st4" d="M442,234.8h-16V86H54v-16c0-8.8,7.2-16,16-16h372c8.8,0,16,7.2,16,16v148.8c0,8.8-7.2,16-16,16Z"/>
+    <path class="st1" d="M442,457.9h-16v-148.1H54v-16c0-8.8,7.2-16,16-16h372c8.8,0,16,7.2,16,16v148.1c0,8.8-7.2,16-16,16Z"/>
+    <circle class="st4" cx="349" cy="144.4" r="37"/>
+    <circle class="st1" cx="349" cy="367.9" r="37"/>
+  </g>
+</svg>
\ No newline at end of file
diff --git a/docs/assets/icons/huggingface.svg b/docs/assets/icons/huggingface.svg
new file mode 100644
index 0000000..43711df
--- /dev/null
+++ b/docs/assets/icons/huggingface.svg
@@ -0,0 +1,41 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="95" height="88" fill="none">
+	<path
+		fill="#fff"
+		d="M94.25 70.08a8.28 8.28 0 0 1-.43 6.46 10.57 10.57 0 0 1-3 3.6 25.18 25.18 0 0 1-5.7 3.2 65.74 65.74 0 0 1-7.56 2.65 46.67 46.67 0 0 1-11.42 1.68c-5.42.05-10.09-1.23-13.4-4.5a40.4 40.4 0 0 1-10.14.03c-3.34 3.25-7.99 4.52-13.39 4.47a46.82 46.82 0 0 1-11.43-1.68 66.37 66.37 0 0 1-7.55-2.65c-2.28-.98-4.17-2-5.68-3.2a10.5 10.5 0 0 1-3.02-3.6c-.99-2-1.18-4.3-.42-6.46a8.54 8.54 0 0 1-.33-5.63c.25-.95.66-1.83 1.18-2.61a8.67 8.67 0 0 1 2.1-8.47 8.23 8.23 0 0 1 2.82-2.07 41.75 41.75 0 1 1 81.3-.12 8.27 8.27 0 0 1 3.11 2.19 8.7 8.7 0 0 1 2.1 8.47c.52.78.93 1.66 1.18 2.61a8.61 8.61 0 0 1-.32 5.63Z"
+	/>
+	<path fill="#FFD21E" d="M47.21 76.5a34.75 34.75 0 1 0 0-69.5 34.75 34.75 0 0 0 0 69.5Z" />
+	<path
+		fill="#FF9D0B"
+		d="M81.96 41.75a34.75 34.75 0 1 0-69.5 0 34.75 34.75 0 0 0 69.5 0Zm-73.5 0a38.75 38.75 0 1 1 77.5 0 38.75 38.75 0 0 1-77.5 0Z"
+	/>
+	<path
+		fill="#3A3B45"
+		d="M58.5 32.3c1.28.44 1.78 3.06 3.07 2.38a5 5 0 1 0-6.76-2.07c.61 1.15 2.55-.72 3.7-.32ZM34.95 32.3c-1.28.44-1.79 3.06-3.07 2.38a5 5 0 1 1 6.76-2.07c-.61 1.15-2.56-.72-3.7-.32Z"
+	/>
+	<path
+		fill="#FF323D"
+		d="M46.96 56.29c9.83 0 13-8.76 13-13.26 0-2.34-1.57-1.6-4.09-.36-2.33 1.15-5.46 2.74-8.9 2.74-7.19 0-13-6.88-13-2.38s3.16 13.26 13 13.26Z"
+	/>
+	<path
+		fill="#3A3B45"
+		fill-rule="evenodd"
+		d="M39.43 54a8.7 8.7 0 0 1 5.3-4.49c.4-.12.81.57 1.24 1.28.4.68.82 1.37 1.24 1.37.45 0 .9-.68 1.33-1.35.45-.7.89-1.38 1.32-1.25a8.61 8.61 0 0 1 5 4.17c3.73-2.94 5.1-7.74 5.1-10.7 0-2.34-1.57-1.6-4.09-.36l-.14.07c-2.31 1.15-5.39 2.67-8.77 2.67s-6.45-1.52-8.77-2.67c-2.6-1.29-4.23-2.1-4.23.29 0 3.05 1.46 8.06 5.47 10.97Z"
+		clip-rule="evenodd"
+	/>
+	<path
+		fill="#FF9D0B"
+		d="M70.71 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM24.21 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM17.52 48c-1.62 0-3.06.66-4.07 1.87a5.97 5.97 0 0 0-1.33 3.76 7.1 7.1 0 0 0-1.94-.3c-1.55 0-2.95.59-3.94 1.66a5.8 5.8 0 0 0-.8 7 5.3 5.3 0 0 0-1.79 2.82c-.24.9-.48 2.8.8 4.74a5.22 5.22 0 0 0-.37 5.02c1.02 2.32 3.57 4.14 8.52 6.1 3.07 1.22 5.89 2 5.91 2.01a44.33 44.33 0 0 0 10.93 1.6c5.86 0 10.05-1.8 12.46-5.34 3.88-5.69 3.33-10.9-1.7-15.92-2.77-2.78-4.62-6.87-5-7.77-.78-2.66-2.84-5.62-6.25-5.62a5.7 5.7 0 0 0-4.6 2.46c-1-1.26-1.98-2.25-2.86-2.82A7.4 7.4 0 0 0 17.52 48Zm0 4c.51 0 1.14.22 1.82.65 2.14 1.36 6.25 8.43 7.76 11.18.5.92 1.37 1.31 2.14 1.31 1.55 0 2.75-1.53.15-3.48-3.92-2.93-2.55-7.72-.68-8.01.08-.02.17-.02.24-.02 1.7 0 2.45 2.93 2.45 2.93s2.2 5.52 5.98 9.3c3.77 3.77 3.97 6.8 1.22 10.83-1.88 2.75-5.47 3.58-9.16 3.58-3.81 0-7.73-.9-9.92-1.46-.11-.03-13.45-3.8-11.76-7 .28-.54.75-.76 1.34-.76 2.38 0 6.7 3.54 8.57 3.54.41 0 .7-.17.83-.6.79-2.85-12.06-4.05-10.98-8.17.2-.73.71-1.02 1.44-1.02 3.14 0 10.2 5.53 11.68 5.53.11 0 .2-.03.24-.1.74-1.2.33-2.04-4.9-5.2-5.21-3.16-8.88-5.06-6.8-7.33.24-.26.58-.38 1-.38 3.17 0 10.66 6.82 10.66 6.82s2.02 2.1 3.25 2.1c.28 0 .52-.1.68-.38.86-1.46-8.06-8.22-8.56-11.01-.34-1.9.24-2.85 1.31-2.85Z"
+	/>
+	<path
+		fill="#FFD21E"
+		d="M38.6 76.69c2.75-4.04 2.55-7.07-1.22-10.84-3.78-3.77-5.98-9.3-5.98-9.3s-.82-3.2-2.69-2.9c-1.87.3-3.24 5.08.68 8.01 3.91 2.93-.78 4.92-2.29 2.17-1.5-2.75-5.62-9.82-7.76-11.18-2.13-1.35-3.63-.6-3.13 2.2.5 2.79 9.43 9.55 8.56 11-.87 1.47-3.93-1.71-3.93-1.71s-9.57-8.71-11.66-6.44c-2.08 2.27 1.59 4.17 6.8 7.33 5.23 3.16 5.64 4 4.9 5.2-.75 1.2-12.28-8.53-13.36-4.4-1.08 4.11 11.77 5.3 10.98 8.15-.8 2.85-9.06-5.38-10.74-2.18-1.7 3.21 11.65 6.98 11.76 7.01 4.3 1.12 15.25 3.49 19.08-2.12Z"
+	/>
+	<path
+		fill="#FF9D0B"
+		d="M77.4 48c1.62 0 3.07.66 4.07 1.87a5.97 5.97 0 0 1 1.33 3.76 7.1 7.1 0 0 1 1.95-.3c1.55 0 2.95.59 3.94 1.66a5.8 5.8 0 0 1 .8 7 5.3 5.3 0 0 1 1.78 2.82c.24.9.48 2.8-.8 4.74a5.22 5.22 0 0 1 .37 5.02c-1.02 2.32-3.57 4.14-8.51 6.1-3.08 1.22-5.9 2-5.92 2.01a44.33 44.33 0 0 1-10.93 1.6c-5.86 0-10.05-1.8-12.46-5.34-3.88-5.69-3.33-10.9 1.7-15.92 2.78-2.78 4.63-6.87 5.01-7.77.78-2.66 2.83-5.62 6.24-5.62a5.7 5.7 0 0 1 4.6 2.46c1-1.26 1.98-2.25 2.87-2.82A7.4 7.4 0 0 1 77.4 48Zm0 4c-.51 0-1.13.22-1.82.65-2.13 1.36-6.25 8.43-7.76 11.18a2.43 2.43 0 0 1-2.14 1.31c-1.54 0-2.75-1.53-.14-3.48 3.91-2.93 2.54-7.72.67-8.01a1.54 1.54 0 0 0-.24-.02c-1.7 0-2.45 2.93-2.45 2.93s-2.2 5.52-5.97 9.3c-3.78 3.77-3.98 6.8-1.22 10.83 1.87 2.75 5.47 3.58 9.15 3.58 3.82 0 7.73-.9 9.93-1.46.1-.03 13.45-3.8 11.76-7-.29-.54-.75-.76-1.34-.76-2.38 0-6.71 3.54-8.57 3.54-.42 0-.71-.17-.83-.6-.8-2.85 12.05-4.05 10.97-8.17-.19-.73-.7-1.02-1.44-1.02-3.14 0-10.2 5.53-11.68 5.53-.1 0-.19-.03-.23-.1-.74-1.2-.34-2.04 4.88-5.2 5.23-3.16 8.9-5.06 6.8-7.33-.23-.26-.57-.38-.98-.38-3.18 0-10.67 6.82-10.67 6.82s-2.02 2.1-3.24 2.1a.74.74 0 0 1-.68-.38c-.87-1.46 8.05-8.22 8.55-11.01.34-1.9-.24-2.85-1.31-2.85Z"
+	/>
+	<path
+		fill="#FFD21E"
+		d="M56.33 76.69c-2.75-4.04-2.56-7.07 1.22-10.84 3.77-3.77 5.97-9.3 5.97-9.3s.82-3.2 2.7-2.9c1.86.3 3.23 5.08-.68 8.01-3.92 2.93.78 4.92 2.28 2.17 1.51-2.75 5.63-9.82 7.76-11.18 2.13-1.35 3.64-.6 3.13 2.2-.5 2.79-9.42 9.55-8.55 11 .86 1.47 3.92-1.71 3.92-1.71s9.58-8.71 11.66-6.44c2.08 2.27-1.58 4.17-6.8 7.33-5.23 3.16-5.63 4-4.9 5.2.75 1.2 12.28-8.53 13.36-4.4 1.08 4.11-11.76 5.3-10.97 8.15.8 2.85 9.05-5.38 10.74-2.18 1.69 3.21-11.65 6.98-11.76 7.01-4.31 1.12-15.26 3.49-19.08-2.12Z"
+	/>
+</svg>
diff --git a/src/cli/commands/huggingface.rs b/src/cli/commands/huggingface.rs
new file mode 100644
index 0000000..f0879ab
--- /dev/null
+++ b/src/cli/commands/huggingface.rs
@@ -0,0 +1,73 @@
+use clap::{Args, Subcommand};
+
+use crate::cli::commands::output::OutputArgs;
+
+use super::github::GitHubOutputFormat;
+
+/// Top-level Hugging Face command group
+#[derive(Args, Debug)]
+pub struct HuggingFaceArgs {
+    #[command(subcommand)]
+    pub command: HuggingFaceCommand,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum HuggingFaceCommand {
+    /// Interact with Hugging Face repositories
+    #[command(subcommand)]
+    Repos(HuggingFaceReposCommand),
+}
+
+#[derive(Subcommand, Debug)]
+pub enum HuggingFaceReposCommand {
+    /// List Hugging Face repositories
+    List(HuggingFaceReposListArgs),
+}
+
+#[derive(Args, Debug, Clone)]
+pub struct HuggingFaceReposListArgs {
+    #[command(flatten)]
+    pub repo_specifiers: HuggingFaceRepoSpecifiers,
+
+    #[command(flatten)]
+    pub output_args: OutputArgs<HuggingFaceOutputFormat>,
+}
+
+#[derive(Args, Debug, Clone, Default)]
+pub struct HuggingFaceRepoSpecifiers {
+    /// Models, datasets, and Spaces owned by these users
+    #[arg(long = "huggingface-user")]
+    pub user: Vec<String>,
+
+    /// Models, datasets, and Spaces owned by these organizations
+    #[arg(long = "huggingface-organization", alias = "huggingface-org")]
+    pub organization: Vec<String>,
+
+    /// Specific models to scan (format: owner/name or full URL)
+    #[arg(long = "huggingface-model")]
+    pub model: Vec<String>,
+
+    /// Specific datasets to scan (format: owner/name or full URL)
+    #[arg(long = "huggingface-dataset")]
+    pub dataset: Vec<String>,
+
+    /// Specific Spaces to scan (format: owner/name or full URL)
+    #[arg(long = "huggingface-space")]
+    pub space: Vec<String>,
+
+    /// Skip specific repositories during enumeration (accepts optional prefixes like model:, dataset:, or space:)
+    #[arg(long = "huggingface-exclude", value_name = "IDENTIFIER")]
+    pub exclude: Vec<String>,
+}
+
+impl HuggingFaceRepoSpecifiers {
+    pub fn is_empty(&self) -> bool {
+        self.user.is_empty()
+            && self.organization.is_empty()
+            && self.model.is_empty()
+            && self.dataset.is_empty()
+            && self.space.is_empty()
+    }
+}
+
+pub type HuggingFaceOutputFormat = GitHubOutputFormat;
diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs
index 4bab9d1..a41cf82 100644
--- a/src/cli/commands/inputs.rs
+++ b/src/cli/commands/inputs.rs
@@ -28,6 +28,11 @@ pub struct InputSpecifierArgs {
             "gitlab_group",
             "gitea_user",
             "gitea_organization",
+            "huggingface_user",
+            "huggingface_organization",
+            "huggingface_model",
+            "huggingface_dataset",
+            "huggingface_space",
             "bitbucket_user",
             "bitbucket_workspace",
             "bitbucket_project",
@@ -43,7 +48,8 @@ pub struct InputSpecifierArgs {
             "confluence_url",
             "docker_image",
             "slack_query",
-            "s3_bucket"
+            "s3_bucket",
+            "gcs_bucket"
         ]),
         num_args = 0..,
         value_hint = ValueHint::AnyPath
@@ -120,6 +126,30 @@ pub struct InputSpecifierArgs {
     #[arg(long, alias = "include-subgroups")]
     pub gitlab_include_subgroups: bool,
 
+    /// Scan models, datasets, and Spaces belonging to the specified Hugging Face users
+    #[arg(long = "huggingface-user")]
+    pub huggingface_user: Vec<String>,
+
+    /// Scan models, datasets, and Spaces belonging to the specified Hugging Face organizations
+    #[arg(long = "huggingface-organization", alias = "huggingface-org")]
+    pub huggingface_organization: Vec<String>,
+
+    /// Scan a specific Hugging Face model (format: owner/name or full URL)
+    #[arg(long = "huggingface-model")]
+    pub huggingface_model: Vec<String>,
+
+    /// Scan a specific Hugging Face dataset (format: owner/name or full URL)
+    #[arg(long = "huggingface-dataset")]
+    pub huggingface_dataset: Vec<String>,
+
+    /// Scan a specific Hugging Face Space (format: owner/name or full URL)
+    #[arg(long = "huggingface-space")]
+    pub huggingface_space: Vec<String>,
+
+    /// Skip specific Hugging Face repositories during enumeration (accepts optional prefixes like model:, dataset:, or space:)
+    #[arg(long = "huggingface-exclude", value_name = "IDENTIFIER")]
+    pub huggingface_exclude: Vec<String>,
+
     // Gitea Options
     /// Scan repositories belonging to the specified Gitea user
     #[arg(long)]
@@ -256,6 +286,18 @@ pub struct InputSpecifierArgs {
     #[arg(long, requires = "s3_bucket")]
     pub aws_local_profile: Option<String>,
 
+    /// Scan the specified Google Cloud Storage bucket
+    #[arg(long)]
+    pub gcs_bucket: Option<String>,
+
+    /// Optional prefix within the GCS bucket
+    #[arg(long, requires = "gcs_bucket")]
+    pub gcs_prefix: Option<String>,
+
+    /// Path to a service account JSON file for GCS authentication
+    #[arg(long, value_hint = ValueHint::FilePath, requires = "gcs_bucket")]
+    pub gcs_service_account: Option<PathBuf>,
+
     /// Docker/OCI images to scan (no local Docker required)
     #[arg(long = "docker-image")]
     pub docker_image: Vec<String>,
@@ -299,7 +341,6 @@ pub struct ContentFilteringArgs {
     #[arg(
         long = "max-file-size",
         visible_alias = "max-filesize",      // also show in --help
-        // alias = "max-filesize",            // use this instead if you DON’T want it shown in --help
         default_value_t = 256.0,
         value_name = "MB"
     )]
diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs
index 0434af9..4b25b89 100644
--- a/src/cli/commands/mod.rs
+++ b/src/cli/commands/mod.rs
@@ -3,6 +3,7 @@ pub mod bitbucket;
 pub mod gitea;
 pub mod github;
 pub mod gitlab;
+pub mod huggingface;
 pub mod inputs;
 pub mod output;
 pub mod rules;
diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs
index 78312d8..f4d7921 100644
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@@ -1,4 +1,5 @@
 use clap::{Args, ValueEnum};
+use std::path::PathBuf;
 use strum::Display;
 use tracing::debug;
 
@@ -119,6 +120,14 @@ pub struct ScanArgs {
     #[arg(long = "skip-word", value_name = "WORD")]
     pub skip_word: Vec<String>,
 
+    /// AWS account IDs whose findings should skip live credential validation (repeatable)
+    #[arg(long = "skip-aws-account", value_name = "ACCOUNT_ID", value_delimiter = ',')]
+    pub skip_aws_account: Vec<String>,
+
+    /// File containing AWS account IDs to skip (one per line, `#` comments ignored)
+    #[arg(long = "skip-aws-account-file", value_name = "FILE")]
+    pub skip_aws_account_file: Option<PathBuf>,
+
     /// Additional inline ignore directives to recognise (repeatable)
     #[arg(long = "ignore-comment", value_name = "DIRECTIVE")]
     pub extra_ignore_comments: Vec<String>,
diff --git a/src/cli/global.rs b/src/cli/global.rs
index a03d3d4..3c2fe3f 100644
--- a/src/cli/global.rs
+++ b/src/cli/global.rs
@@ -8,7 +8,7 @@ use tracing::Level;
 
 use crate::cli::commands::{
     azure::AzureArgs, bitbucket::BitbucketArgs, gitea::GiteaArgs, github::GitHubArgs,
-    gitlab::GitLabArgs, rules::RulesArgs, scan::ScanArgs,
+    gitlab::GitLabArgs, huggingface::HuggingFaceArgs, rules::RulesArgs, scan::ScanArgs,
 };
 
 #[deny(missing_docs)]
@@ -81,6 +81,10 @@ pub enum Command {
     #[command(name = "azure")]
     Azure(AzureArgs),
 
+    /// Interact with the Hugging Face Hub
+    #[command(name = "huggingface")]
+    HuggingFace(HuggingFaceArgs),
+
     /// Manage rules
     #[command(alias = "rule")]
     Rules(RulesArgs),
diff --git a/src/gcs.rs b/src/gcs.rs
new file mode 100644
index 0000000..dc346fd
--- /dev/null
+++ b/src/gcs.rs
@@ -0,0 +1,96 @@
+use std::path::Path;
+
+use anyhow::{Context, Result};
+use gcloud_storage::{
+    client::{google_cloud_auth::credentials::CredentialsFile, Client, ClientConfig},
+    http::objects::{
+        download::Range,
+        get::GetObjectRequest,
+        list::{ListObjectsRequest, ListObjectsResponse},
+    },
+};
+use tracing::debug;
+
+/// Visit every object in the given GCS bucket, optionally filtered by prefix.
+///
+/// Authentication is attempted via Application Default Credentials. When that
+/// fails and no explicit service account path was provided, the client falls
+/// back to anonymous access so public buckets can still be scanned.
+pub async fn visit_bucket_objects<F>(
+    bucket: &str,
+    prefix: Option<&str>,
+    service_account_path: Option<&Path>,
+    mut visitor: F,
+) -> Result<()>
+where
+    F: FnMut(String, Vec<u8>) -> Result<()>,
+{
+    let config_result = if let Some(path) = service_account_path {
+        let credentials = CredentialsFile::new_from_file(path.to_string_lossy().into_owned())
+            .await
+            .with_context(|| {
+                format!("Failed to read GCS service account credentials from {}", path.display())
+            })?;
+
+        ClientConfig::default().with_credentials(credentials).await
+    } else {
+        ClientConfig::default().with_auth().await
+    };
+
+    let config = match config_result {
+        Ok(config) => config,
+        Err(err) => {
+            if service_account_path.is_some()
+                || std::env::var("GOOGLE_APPLICATION_CREDENTIALS").is_ok()
+                || std::env::var("GOOGLE_APPLICATION_CREDENTIALS_JSON").is_ok()
+            {
+                return Err(err)
+                    .context("Failed to authenticate with GCS using provided credentials");
+            }
+            debug!("Falling back to anonymous GCS access: {err}");
+            ClientConfig::default().anonymous()
+        }
+    };
+
+    let client = Client::new(config);
+    let mut page_token: Option<String> = None;
+
+    loop {
+        let request = ListObjectsRequest {
+            bucket: bucket.to_string(),
+            prefix: prefix.map(|p| p.to_string()),
+            page_token: page_token.clone(),
+            ..ListObjectsRequest::default()
+        };
+
+        let mut response: ListObjectsResponse = client
+            .list_objects(&request)
+            .await
+            .with_context(|| format!("Failed to list objects in bucket {bucket}"))?;
+
+        if let Some(items) = response.items.take() {
+            for object in items.into_iter().filter(|o| !o.name.is_empty()) {
+                let data = client
+                    .download_object(
+                        &GetObjectRequest {
+                            bucket: bucket.to_string(),
+                            object: object.name.clone(),
+                            ..GetObjectRequest::default()
+                        },
+                        &Range::default(),
+                    )
+                    .await
+                    .with_context(|| format!("Failed to fetch object {}", object.name))?;
+
+                visitor(object.name, data)?;
+            }
+        }
+
+        match response.next_page_token {
+            Some(token) if !token.is_empty() => page_token = Some(token),
+            _ => break,
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/git_binary.rs b/src/git_binary.rs
index 82fd990..a629373 100644
--- a/src/git_binary.rs
+++ b/src/git_binary.rs
@@ -40,6 +40,15 @@ const AZURE_CREDENTIAL_HELPER: &str = r#"credential.helper=!_azcreds() {
     fi
 }; _azcreds"#;
 
+const HUGGINGFACE_CREDENTIAL_HELPER: &str = r#"credential.helper=!_hfcreds() {
+    token="$KF_HUGGINGFACE_TOKEN";
+    if [ -n "$token" ]; then
+        user="${KF_HUGGINGFACE_USERNAME:-hf_user}";
+        echo username="$user";
+        echo password="$token";
+    fi
+}; _hfcreds"#;
+
 /// Represents errors that can occur when interacting with the `git` CLI.
 #[derive(Debug, thiserror::Error)]
 pub enum GitError {
@@ -47,14 +56,37 @@ pub enum GitError {
     IOError(#[from] std::io::Error),
 
     #[error(
-        "git execution failed\ncode={}\nstdout=```\n{}```\nstderr=```\n{}```",
-        .status,
-        String::from_utf8_lossy(.stdout),
-        String::from_utf8_lossy(.stderr)
+        "git execution failed (status: {status}){summary}",
+        status = format_exit_status(.status),
+        summary = format_git_error_summary(.stdout.as_slice(), .stderr.as_slice())
     )]
     GitError { stdout: Vec<u8>, stderr: Vec<u8>, status: ExitStatus },
 }
 
+fn format_exit_status(status: &ExitStatus) -> String {
+    status.code().map(|code| code.to_string()).unwrap_or_else(|| status.to_string())
+}
+
+fn format_git_error_summary(stdout: &[u8], stderr: &[u8]) -> String {
+    let mut messages = Vec::new();
+    if let Some(line) = summarize_output(stderr) {
+        messages.push(line);
+    }
+    if let Some(line) = summarize_output(stdout) {
+        messages.push(line);
+    }
+    if messages.is_empty() {
+        String::new()
+    } else {
+        format!(": {}", messages.join(" | "))
+    }
+}
+
+fn summarize_output(output: &[u8]) -> Option<String> {
+    let text = String::from_utf8_lossy(output);
+    text.lines().map(str::trim).find(|line| !line.is_empty()).map(|line| line.to_owned())
+}
+
 /// A helper struct for running `git` commands.
 ///
 /// It supports optional GitHub, GitLab, Gitea, and Bitbucket credentials passed via
@@ -91,6 +123,8 @@ impl Git {
         let has_azure_token = ["KF_AZURE_TOKEN", "KF_AZURE_PAT"]
             .iter()
             .any(|key| matches!(std::env::var(key), Ok(value) if !value.is_empty()));
+        let has_huggingface_token =
+            matches!(std::env::var("KF_HUGGINGFACE_TOKEN"), Ok(value) if !value.is_empty());
 
         // If credentials are provided via environment variables, clear existing helpers first.
         if has_github_token
@@ -98,6 +132,7 @@ impl Git {
             || has_gitea_token
             || has_bitbucket_credentials
             || has_azure_token
+            || has_huggingface_token
         {
             credentials.push("-c".into());
             credentials.push(r#"credential.helper="#.into());
@@ -136,6 +171,11 @@ impl Git {
             credentials.push(AZURE_CREDENTIAL_HELPER.into());
         }
 
+        if has_huggingface_token {
+            credentials.push("-c".into());
+            credentials.push(HUGGINGFACE_CREDENTIAL_HELPER.into());
+        }
+
         Self { credentials, ignore_certs }
     }
 
diff --git a/src/huggingface.rs b/src/huggingface.rs
new file mode 100644
index 0000000..42bbf03
--- /dev/null
+++ b/src/huggingface.rs
@@ -0,0 +1,636 @@
+use std::{collections::HashSet, env, time::Duration};
+
+use anyhow::{anyhow, Result};
+use indicatif::{ProgressBar, ProgressStyle};
+use reqwest::{header::LINK, StatusCode, Url};
+use serde::Deserialize;
+use serde_json::Value;
+use tracing::{debug, warn};
+
+use crate::{git_url::GitUrl, validation::GLOBAL_USER_AGENT};
+
+#[derive(Debug, Clone, Default)]
+pub struct RepoSpecifiers {
+    pub user: Vec<String>,
+    pub organization: Vec<String>,
+    pub model: Vec<String>,
+    pub dataset: Vec<String>,
+    pub space: Vec<String>,
+    pub exclude: Vec<String>,
+}
+
+impl RepoSpecifiers {
+    pub fn is_empty(&self) -> bool {
+        self.user.is_empty()
+            && self.organization.is_empty()
+            && self.model.is_empty()
+            && self.dataset.is_empty()
+            && self.space.is_empty()
+    }
+}
+
+#[derive(Clone, Default)]
+pub struct AuthConfig {
+    token: Option<String>,
+}
+
+impl std::fmt::Debug for AuthConfig {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("AuthConfig")
+            .field(
+                "token",
+                &self
+                    .token
+                    .as_ref()
+                    .map(|token| format!("{}…", token.chars().take(4).collect::<String>())),
+            )
+            .finish()
+    }
+}
+
+impl AuthConfig {
+    pub fn from_env() -> Self {
+        let token = env::var("KF_HUGGINGFACE_TOKEN").ok().filter(|t| !t.trim().is_empty());
+        Self { token }
+    }
+
+    fn apply(&self, request: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
+        if let Some(token) = &self.token {
+            request.bearer_auth(token)
+        } else {
+            request
+        }
+    }
+
+    fn has_token(&self) -> bool {
+        self.token.is_some()
+    }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+enum ResourceKind {
+    Model,
+    Dataset,
+    Space,
+}
+
+impl ResourceKind {
+    fn api_path(self) -> &'static str {
+        match self {
+            ResourceKind::Model => "models",
+            ResourceKind::Dataset => "datasets",
+            ResourceKind::Space => "spaces",
+        }
+    }
+
+    fn git_url(self, slug: &str) -> String {
+        match self {
+            ResourceKind::Model => format!("https://huggingface.co/{slug}.git"),
+            ResourceKind::Dataset => format!("https://huggingface.co/datasets/{slug}.git"),
+            ResourceKind::Space => format!("https://huggingface.co/spaces/{slug}.git"),
+        }
+    }
+
+    fn canonical_prefix(self) -> &'static str {
+        match self {
+            ResourceKind::Model => "model",
+            ResourceKind::Dataset => "dataset",
+            ResourceKind::Space => "space",
+        }
+    }
+
+    fn display_name_singular(self) -> &'static str {
+        match self {
+            ResourceKind::Model => "model",
+            ResourceKind::Dataset => "dataset",
+            ResourceKind::Space => "space",
+        }
+    }
+
+    fn display_name_plural(self) -> &'static str {
+        match self {
+            ResourceKind::Model => "models",
+            ResourceKind::Dataset => "datasets",
+            ResourceKind::Space => "spaces",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+struct ResourceRef {
+    kind: ResourceKind,
+    slug: String,
+}
+
+impl ResourceRef {
+    fn new(kind: ResourceKind, slug: String) -> Self {
+        Self { kind, slug }
+    }
+
+    fn canonical_key(&self) -> String {
+        format!("{}:{}", self.kind.canonical_prefix(), self.slug.to_lowercase())
+    }
+
+    fn git_url(&self) -> String {
+        self.kind.git_url(&self.slug)
+    }
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+enum HuggingFaceItem {
+    Id {
+        id: String,
+    },
+    ModelId {
+        #[serde(rename = "modelId")]
+        model_id: String,
+    },
+}
+
+impl HuggingFaceItem {
+    fn into_identifier(self) -> String {
+        match self {
+            HuggingFaceItem::Id { id } => id,
+            HuggingFaceItem::ModelId { model_id } => model_id,
+        }
+    }
+}
+
+#[derive(Default)]
+struct ExcludeSet {
+    typed: HashSet<String>,
+    untyped: HashSet<String>,
+}
+
+impl ExcludeSet {
+    fn from_list(values: &[String]) -> Self {
+        let mut typed = HashSet::new();
+        let mut untyped = HashSet::new();
+        for raw in values {
+            let trimmed = raw.trim();
+            if trimmed.is_empty() {
+                continue;
+            }
+            if let Some((prefix, rest)) = trimmed.split_once(':') {
+                match normalize_kind(prefix) {
+                    Some(kind) => {
+                        if let Some(slug) = parse_slug_for_kind(kind, rest) {
+                            typed.insert(format!(
+                                "{}:{}",
+                                kind.canonical_prefix(),
+                                slug.to_lowercase()
+                            ));
+                        } else {
+                            warn!("Ignoring invalid Hugging Face exclusion '{raw}' (expected owner/name)");
+                        }
+                    }
+                    None => warn!("Ignoring invalid Hugging Face exclusion '{raw}' (unknown type)"),
+                }
+            } else if let Some(slug) = normalize_untyped_slug(trimmed) {
+                untyped.insert(slug);
+            } else {
+                warn!("Ignoring invalid Hugging Face exclusion '{raw}' (expected owner/name)");
+            }
+        }
+        Self { typed, untyped }
+    }
+
+    fn should_exclude(&self, kind: ResourceKind, slug: &str) -> bool {
+        let typed_key = format!("{}:{}", kind.canonical_prefix(), slug.to_lowercase());
+        if self.typed.contains(&typed_key) {
+            return true;
+        }
+        self.untyped.contains(&slug.to_lowercase())
+    }
+}
+
+fn normalize_kind(raw: &str) -> Option<ResourceKind> {
+    match raw.trim().to_ascii_lowercase().as_str() {
+        "model" | "models" => Some(ResourceKind::Model),
+        "dataset" | "datasets" => Some(ResourceKind::Dataset),
+        "space" | "spaces" => Some(ResourceKind::Space),
+        _ => None,
+    }
+}
+
+fn normalize_untyped_slug(raw: &str) -> Option<String> {
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    let segments: Vec<&str> = trimmed.split('/').filter(|segment| !segment.is_empty()).collect();
+    normalize_untyped_segments(&segments)
+}
+
+fn normalize_untyped_segments(segments: &[&str]) -> Option<String> {
+    if segments.is_empty() {
+        return None;
+    }
+    let mut parts: Vec<&str> = segments.to_vec();
+    if let Some(first) = parts.first() {
+        let lowered = first.trim().to_ascii_lowercase();
+        if matches!(
+            lowered.as_str(),
+            "models" | "model" | "datasets" | "dataset" | "spaces" | "space"
+        ) {
+            parts.remove(0);
+        }
+    }
+    if parts.len() < 2 {
+        return None;
+    }
+    let owner = parts[0].trim();
+    let binding = parts[1..].join("/");
+    let name = binding.trim_end_matches(".git").trim();
+
+    if owner.is_empty() || name.is_empty() {
+        return None;
+    }
+    Some(format!("{}/{}", owner, name).to_lowercase())
+}
+
+fn parse_slug_for_kind(kind: ResourceKind, raw: &str) -> Option<String> {
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
+        let url = Url::parse(trimmed).ok()?;
+        let segments: Vec<&str> = url
+            .path_segments()
+            .map(|segments| segments.filter(|s| !s.is_empty()).collect())
+            .unwrap_or_default();
+        return parse_slug_segments(kind, &segments);
+    }
+    let segments: Vec<&str> = trimmed.split('/').filter(|s| !s.is_empty()).collect();
+    parse_slug_segments(kind, &segments)
+}
+
+fn parse_slug_segments(kind: ResourceKind, segments: &[&str]) -> Option<String> {
+    if segments.is_empty() {
+        return None;
+    }
+    let mut parts: Vec<&str> = segments.to_vec();
+    if let Some(first) = parts.first() {
+        let lowered = first.trim().to_ascii_lowercase();
+        let should_trim = match kind {
+            ResourceKind::Model => matches!(lowered.as_str(), "models" | "model"),
+            ResourceKind::Dataset => matches!(lowered.as_str(), "datasets" | "dataset"),
+            ResourceKind::Space => matches!(lowered.as_str(), "spaces" | "space"),
+        };
+        if should_trim {
+            parts.remove(0);
+        }
+    }
+    if parts.len() < 2 {
+        return None;
+    }
+    let owner = parts[0].trim();
+    let binding = parts[1..].join("/");
+    let name = binding.trim_end_matches(".git").trim();
+
+    if owner.is_empty() || name.is_empty() {
+        return None;
+    }
+    Some(format!("{owner}/{name}"))
+}
+
+fn parse_next_link(value: &str) -> Option<Url> {
+    value.split(',').find_map(|part| {
+        let part = part.trim();
+        let (url_part, params) = part.split_once('>')?;
+        if params.contains("rel=\"next\"") {
+            let url = url_part.trim_start_matches('<').trim();
+            Url::parse(url).ok()
+        } else {
+            None
+        }
+    })
+}
+
+const BODY_SNIPPET_LIMIT: usize = 200;
+
+async fn fetch_paginated(
+    client: &reqwest::Client,
+    mut current_url: Url,
+    auth: &AuthConfig,
+    context: &str,
+) -> Result<Vec<HuggingFaceItem>> {
+    let mut items = Vec::new();
+    loop {
+        let mut request =
+            client.get(current_url.clone()).header("User-Agent", GLOBAL_USER_AGENT.as_str());
+        request = auth.apply(request);
+        let response = request.send().await?;
+        let status = response.status();
+        let link_header = response
+            .headers()
+            .get(LINK)
+            .and_then(|value| value.to_str().ok())
+            .map(|value| value.to_string());
+        if !status.is_success() {
+            let body = response.text().await.unwrap_or_default();
+            let mut message = format!(
+                "Hugging Face API request failed while enumerating {context} ({status}): {body}"
+            );
+            if matches!(status, StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN)
+                && !auth.has_token()
+            {
+                message.push_str(
+                    "\nProvide a Hugging Face access token via the KF_HUGGINGFACE_TOKEN environment variable.",
+                );
+            }
+            return Err(anyhow!(message));
+        }
+        let body = response.bytes().await?;
+        let value: Value = serde_json::from_slice(&body).map_err(|err| {
+            let snippet = body_snippet(&body);
+            anyhow!(
+                "Failed to parse Hugging Face response while enumerating {context}: {err}. Body snippet: {snippet}",
+                context = context,
+                err = err,
+                snippet = snippet
+            )
+        })?;
+
+        let array = value.as_array().ok_or_else(|| {
+            let snippet = body_snippet(&body);
+            anyhow!(
+                "Unexpected Hugging Face response format while enumerating {context} (expected array). Body snippet: {snippet}",
+                context = context,
+                snippet = snippet
+            )
+        })?;
+
+        let mut page = Vec::new();
+        for (index, element) in array.iter().enumerate() {
+            match serde_json::from_value::<HuggingFaceItem>(element.clone()) {
+                Ok(item) => page.push(item),
+                Err(err) => {
+                    let snippet = value_snippet(element);
+                    warn!(
+                        "Skipping Hugging Face item at index {index} while enumerating {context}: {err}. Item snippet: {snippet}"
+                    );
+                }
+            }
+        }
+        items.append(&mut page);
+        if let Some(link_value) = link_header {
+            if let Some(next_url) = parse_next_link(&link_value) {
+                current_url = next_url;
+                continue;
+            }
+        }
+        break;
+    }
+    Ok(items)
+}
+
+fn body_snippet(body: &[u8]) -> String {
+    truncate_for_display(&String::from_utf8_lossy(body), BODY_SNIPPET_LIMIT)
+}
+
+fn value_snippet(value: &Value) -> String {
+    let text = value.to_string();
+    truncate_for_display(&text, BODY_SNIPPET_LIMIT)
+}
+
+fn truncate_for_display(text: &str, limit: usize) -> String {
+    let mut snippet: String = text.chars().take(limit).collect();
+    if text.chars().count() > limit {
+        snippet.push('…');
+    }
+    snippet
+}
+
+async fn fetch_resources_for_owner(
+    client: &reqwest::Client,
+    base_url: &Url,
+    owner: &str,
+    label: &str,
+    auth: &AuthConfig,
+    progress: Option<&ProgressBar>,
+) -> Result<Vec<ResourceRef>> {
+    let mut resources = Vec::new();
+    for kind in [ResourceKind::Model, ResourceKind::Dataset, ResourceKind::Space] {
+        if let Some(pb) = progress {
+            pb.set_message(format!(
+                "Enumerating Hugging Face {label} {}",
+                kind.display_name_plural()
+            ));
+        }
+        let mut url = base_url.join(kind.api_path())?;
+        {
+            let mut pairs = url.query_pairs_mut();
+            pairs.append_pair("author", owner);
+            pairs.append_pair("limit", "100");
+        }
+        let context = format!("{} for {label}", kind.display_name_plural());
+        match fetch_paginated(client, url, auth, &context).await {
+            Ok(items) => {
+                for item in items {
+                    let identifier = item.into_identifier();
+                    if let Some(slug) = parse_slug_for_kind(kind, &identifier) {
+                        resources.push(ResourceRef::new(kind, slug));
+                    } else {
+                        warn!(
+                            "Skipping Hugging Face {} with unexpected identifier '{}'",
+                            kind.display_name_singular(),
+                            identifier
+                        );
+                    }
+                }
+            }
+            Err(err) => {
+                warn!(
+                    "Failed to enumerate Hugging Face {} for {label}: {err}",
+                    kind.display_name_plural()
+                );
+            }
+        }
+    }
+    Ok(resources)
+}
+
+fn append_explicit_resources(specifiers: &RepoSpecifiers, resources: &mut Vec<ResourceRef>) {
+    for model in &specifiers.model {
+        if let Some(slug) = parse_slug_for_kind(ResourceKind::Model, model) {
+            resources.push(ResourceRef::new(ResourceKind::Model, slug));
+        } else {
+            warn!("Ignoring invalid Hugging Face model identifier '{model}'");
+        }
+    }
+    for dataset in &specifiers.dataset {
+        if let Some(slug) = parse_slug_for_kind(ResourceKind::Dataset, dataset) {
+            resources.push(ResourceRef::new(ResourceKind::Dataset, slug));
+        } else {
+            warn!("Ignoring invalid Hugging Face dataset identifier '{dataset}'");
+        }
+    }
+    for space in &specifiers.space {
+        if let Some(slug) = parse_slug_for_kind(ResourceKind::Space, space) {
+            resources.push(ResourceRef::new(ResourceKind::Space, slug));
+        } else {
+            warn!("Ignoring invalid Hugging Face space identifier '{space}'");
+        }
+    }
+}
+
+pub async fn enumerate_repo_urls(
+    specifiers: &RepoSpecifiers,
+    auth: &AuthConfig,
+    ignore_certs: bool,
+    progress: Option<&mut ProgressBar>,
+) -> Result<Vec<String>> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(30))
+        .danger_accept_invalid_certs(ignore_certs)
+        .build()?;
+    let base_url = Url::parse("https://huggingface.co/api/")?;
+    let excludes = ExcludeSet::from_list(&specifiers.exclude);
+    let mut collected = Vec::new();
+
+    for user in &specifiers.user {
+        let label = format!("user {user}");
+        if let Some(pb) = progress.as_ref() {
+            pb.set_message(format!("Enumerating Hugging Face {label}"));
+        }
+        match fetch_resources_for_owner(
+            &client,
+            &base_url,
+            user,
+            &label,
+            auth,
+            progress.as_ref().map(|pb| &**pb),
+        )
+        .await
+        {
+            Ok(mut resources) => collected.append(&mut resources),
+            Err(err) => warn!("Failed to enumerate Hugging Face user {user}: {err}"),
+        }
+    }
+
+    for org in &specifiers.organization {
+        let label = format!("organization {org}");
+        if let Some(pb) = progress.as_ref() {
+            pb.set_message(format!("Enumerating Hugging Face {label}"));
+        }
+        match fetch_resources_for_owner(
+            &client,
+            &base_url,
+            org,
+            &label,
+            auth,
+            progress.as_ref().map(|pb| &**pb),
+        )
+        .await
+        {
+            Ok(mut resources) => collected.append(&mut resources),
+            Err(err) => warn!("Failed to enumerate Hugging Face organization {org}: {err}"),
+        }
+    }
+
+    append_explicit_resources(specifiers, &mut collected);
+
+    let mut seen = HashSet::new();
+    let mut urls = Vec::new();
+    for resource in collected {
+        if excludes.should_exclude(resource.kind, &resource.slug) {
+            debug!(
+                "Skipping Hugging Face {} {} due to exclusion",
+                resource.kind.display_name_singular(),
+                resource.slug
+            );
+            continue;
+        }
+        let key = resource.canonical_key();
+        if seen.insert(key) {
+            urls.push(resource.git_url());
+        }
+    }
+    urls.sort();
+    urls.dedup();
+    Ok(urls)
+}
+
+pub async fn list_repositories(
+    specifiers: &RepoSpecifiers,
+    auth: &AuthConfig,
+    ignore_certs: bool,
+    progress_enabled: bool,
+) -> Result<()> {
+    let mut progress = if progress_enabled {
+        let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]")
+            .expect("progress bar style template should compile");
+        let pb = ProgressBar::new_spinner()
+            .with_style(style)
+            .with_message("Enumerating Hugging Face repositories");
+        pb.enable_steady_tick(Duration::from_millis(500));
+        pb
+    } else {
+        ProgressBar::hidden()
+    };
+
+    let urls = enumerate_repo_urls(specifiers, auth, ignore_certs, Some(&mut progress)).await?;
+    for url in urls {
+        println!("{url}");
+    }
+    progress.finish_and_clear();
+    Ok(())
+}
+
+pub fn wiki_url(_repo_url: &GitUrl) -> Option<GitUrl> {
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_model_slug_from_plain() {
+        assert_eq!(
+            parse_slug_for_kind(ResourceKind::Model, "user/model"),
+            Some("user/model".to_string())
+        );
+    }
+
+    #[test]
+    fn parse_dataset_slug_with_prefix() {
+        assert_eq!(
+            parse_slug_for_kind(ResourceKind::Dataset, "datasets/user/data.git"),
+            Some("user/data".to_string())
+        );
+    }
+
+    #[test]
+    fn parse_space_slug_from_url() {
+        assert_eq!(
+            parse_slug_for_kind(ResourceKind::Space, "https://huggingface.co/spaces/user/demo"),
+            Some("user/demo".to_string())
+        );
+    }
+
+    #[test]
+    fn exclude_set_matches_typed_and_untyped() {
+        let excludes =
+            ExcludeSet::from_list(&vec!["model:user/model".into(), "datasets/user/data".into()]);
+        assert!(excludes.should_exclude(ResourceKind::Model, "user/model"));
+        assert!(excludes.should_exclude(ResourceKind::Dataset, "user/data"));
+        assert!(!excludes.should_exclude(ResourceKind::Space, "user/space"));
+    }
+
+    #[test]
+    fn parse_link_header() {
+        let header = "<https://huggingface.co/api/models?cursor=abc>; rel=\"next\"";
+        let url = parse_next_link(header).expect("next link");
+        assert_eq!(url.as_str(), "https://huggingface.co/api/models?cursor=abc");
+    }
+
+    #[test]
+    fn truncate_for_display_adds_ellipsis() {
+        assert_eq!(truncate_for_display("abcdef", 3), "abc…");
+        assert_eq!(truncate_for_display("abc", 5), "abc");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 9fc8106..1736bd1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,7 @@ pub mod defaults;
 pub mod entropy;
 pub mod finding_data;
 pub mod findings_store;
+pub mod gcs;
 pub mod git_binary;
 pub mod git_commit_metadata;
 pub mod git_metadata_graph;
@@ -21,6 +22,7 @@ pub mod git_url;
 pub mod gitea;
 pub mod github;
 pub mod gitlab;
+pub mod huggingface;
 pub mod inline_ignore;
 pub mod jira;
 pub mod liquid_filters;
diff --git a/src/main.rs b/src/main.rs
index c0f6f9a..d6c36c7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -52,7 +52,7 @@ use kingfisher::{
     },
     findings_store,
     findings_store::FindingsStore,
-    gitea, github,
+    gitea, github, huggingface,
     rule_loader::RuleLoader,
     rules_database::RulesDatabase,
     scanner::{load_and_record_rules, run_scan},
@@ -75,6 +75,7 @@ use crate::cli::commands::{
     bitbucket::{BitbucketAuthArgs, BitbucketCommand, BitbucketRepoType, BitbucketReposCommand},
     gitea::{GiteaCommand, GiteaRepoType, GiteaReposCommand},
     gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand},
+    huggingface::{HuggingFaceCommand, HuggingFaceReposCommand},
 };
 
 fn main() -> anyhow::Result<()> {
@@ -93,6 +94,7 @@ fn main() -> anyhow::Result<()> {
         Command::Bitbucket(_) => num_cpus::get(), // Default for Bitbucket commands
         Command::Gitea(_) => num_cpus::get(), // Default for Gitea commands
         Command::Azure(_) => num_cpus::get(), // Default for Azure commands
+        Command::HuggingFace(_) => num_cpus::get(), // Default for Hugging Face commands
         Command::Rules(_) => num_cpus::get(), // Default for Rules commands
     };
 
@@ -327,6 +329,28 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
                         }
                     },
                 },
+                Command::HuggingFace(hf_args) => match hf_args.command {
+                    HuggingFaceCommand::Repos(repos_command) => match repos_command {
+                        HuggingFaceReposCommand::List(list_args) => {
+                            let specifiers = huggingface::RepoSpecifiers {
+                                user: list_args.repo_specifiers.user.clone(),
+                                organization: list_args.repo_specifiers.organization.clone(),
+                                model: list_args.repo_specifiers.model.clone(),
+                                dataset: list_args.repo_specifiers.dataset.clone(),
+                                space: list_args.repo_specifiers.space.clone(),
+                                exclude: list_args.repo_specifiers.exclude.clone(),
+                            };
+                            let auth = huggingface::AuthConfig::from_env();
+                            huggingface::list_repositories(
+                                &specifiers,
+                                &auth,
+                                global_args.ignore_certs,
+                                global_args.use_progress(),
+                            )
+                            .await?;
+                        }
+                    },
+                },
                 Command::SelfUpdate => {
                     anyhow::bail!("SelfUpdate command should not reach this branch")
                 }
@@ -367,6 +391,13 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
             gitlab_repo_type: GitLabRepoType::All,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -400,6 +431,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             // Slack query
             slack_query: None,
             slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@@ -436,6 +470,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
         no_base64: false,
         no_inline_ignore: false,
diff --git a/src/reporter.rs b/src/reporter.rs
index 52a491c..127a9ad 100644
--- a/src/reporter.rs
+++ b/src/reporter.rs
@@ -703,7 +703,7 @@ mod tests {
         git_commit_metadata::CommitMetadata,
         location::{Location, OffsetSpan, SourcePoint, SourceSpan},
         matcher::{SerializableCapture, SerializableCaptures},
-        origin::OriginSet,
+        origin::{Origin, OriginSet},
         rules::rule::{Confidence, Rule, RuleSyntax},
     };
     use gix::{date::Time, ObjectId};
@@ -711,79 +711,8 @@ mod tests {
     use std::path::PathBuf;
     use tempfile::tempdir;
 
-    #[test]
-    fn build_finding_record_uses_git_blob_path() {
-        let temp = tempdir().unwrap();
-        let datastore =
-            Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
-        let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
-
-        let repo_path = Arc::new(PathBuf::from("/tmp/repo"));
-        let commit_metadata = Arc::new(CommitMetadata {
-            commit_id: ObjectId::from_hex(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(),
-            committer_name: "Alice".into(),
-            committer_email: "alice@example.com".into(),
-            committer_timestamp: Time::new(0, 0),
-        });
-        let blob_path = "path/in/history.txt".to_string();
-        let origin = OriginSet::new(
-            Origin::from_git_repo_with_first_commit(repo_path, commit_metadata, blob_path.clone()),
-            vec![],
-        );
-
-        let rule = Arc::new(Rule::new(RuleSyntax {
-            name: "Test Rule".into(),
-            id: "test.rule".into(),
-            pattern: ".*".into(),
-            min_entropy: 0.0,
-            confidence: Confidence::Medium,
-            visible: true,
-            examples: vec![],
-            negative_examples: vec![],
-            references: vec![],
-            validation: None,
-            depends_on_rule: vec![],
-        }));
-
-        let blob_id = BlobId::new(b"blob-data");
-        let report_match = ReportMatch {
-            origin,
-            blob_metadata: BlobMetadata {
-                id: blob_id,
-                num_bytes: 42,
-                mime_essence: None,
-                language: Some("Unknown".into()),
-            },
-            m: Match {
-                location: Location {
-                    offset_span: OffsetSpan { start: 0, end: 10 },
-                    source_span: SourceSpan {
-                        start: SourcePoint { line: 19, column: 0 },
-                        end: SourcePoint { line: 19, column: 10 },
-                    },
-                },
-                groups: SerializableCaptures {
-                    captures: SmallVec::<[SerializableCapture; 2]>::new(),
-                },
-                blob_id,
-                finding_fingerprint: 123,
-                rule: Arc::clone(&rule),
-                validation_response_body: "Bad credentials".into(),
-                validation_response_status: 401,
-                validation_success: false,
-                calculated_entropy: 5.29,
-                visible: true,
-                is_base64: false,
-            },
-            comment: None,
-            match_confidence: Confidence::Medium,
-            visible: true,
-            validation_response_body: "Bad credentials".into(),
-            validation_response_status: 401,
-            validation_success: false,
-        };
-
-        let scan_args = ScanArgs {
+    fn sample_scan_args() -> ScanArgs {
+        ScanArgs {
             num_jobs: 1,
             rules: RuleSpecifierArgs::default(),
             input_specifier_args: InputSpecifierArgs {
@@ -802,6 +731,12 @@ mod tests {
                 gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
                 gitlab_repo_type: GitLabRepoType::All,
                 gitlab_include_subgroups: false,
+                huggingface_user: Vec::new(),
+                huggingface_organization: Vec::new(),
+                huggingface_model: Vec::new(),
+                huggingface_dataset: Vec::new(),
+                huggingface_space: Vec::new(),
+                huggingface_exclude: Vec::new(),
                 gitea_user: Vec::new(),
                 gitea_organization: Vec::new(),
                 gitea_exclude: Vec::new(),
@@ -833,6 +768,9 @@ mod tests {
                 s3_prefix: None,
                 role_arn: None,
                 aws_local_profile: None,
+                gcs_bucket: None,
+                gcs_prefix: None,
+                gcs_service_account: None,
                 docker_image: Vec::new(),
                 git_clone: GitCloneMode::Bare,
                 git_history: GitHistoryMode::Full,
@@ -864,9 +802,98 @@ mod tests {
             manage_baseline: false,
             skip_regex: Vec::new(),
             skip_word: Vec::new(),
+            skip_aws_account: Vec::new(),
+            skip_aws_account_file: None,
             no_inline_ignore: false,
+        }
+    }
+
+    fn sample_report_match(
+        validation_body: &str,
+        validation_status: u16,
+        validation_success: bool,
+    ) -> (ReportMatch, String) {
+        let repo_path = Arc::new(PathBuf::from("/tmp/repo"));
+        let commit_metadata = Arc::new(CommitMetadata {
+            commit_id: ObjectId::from_hex(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(),
+            committer_name: "Alice".into(),
+            committer_email: "alice@example.com".into(),
+            committer_timestamp: Time::new(0, 0),
+        });
+        let blob_path = "path/in/history.txt".to_string();
+        let origin = OriginSet::new(
+            Origin::from_git_repo_with_first_commit(repo_path, commit_metadata, blob_path.clone()),
+            vec![],
+        );
+
+        let rule = Arc::new(Rule::new(RuleSyntax {
+            name: "Test Rule".into(),
+            id: "test.rule".into(),
+            pattern: ".*".into(),
+            min_entropy: 0.0,
+            confidence: Confidence::Medium,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None,
+            depends_on_rule: vec![],
+        }));
+
+        let blob_id = BlobId::new(b"blob-data");
+        let validation_body_owned = validation_body.to_string();
+        let report_match = ReportMatch {
+            origin,
+            blob_metadata: BlobMetadata {
+                id: blob_id,
+                num_bytes: 42,
+                mime_essence: None,
+                language: Some("Unknown".into()),
+            },
+            m: Match {
+                location: Location {
+                    offset_span: OffsetSpan { start: 0, end: 10 },
+                    source_span: SourceSpan {
+                        start: SourcePoint { line: 19, column: 0 },
+                        end: SourcePoint { line: 19, column: 10 },
+                    },
+                },
+                groups: SerializableCaptures {
+                    captures: SmallVec::<[SerializableCapture; 2]>::new(),
+                },
+                blob_id,
+                finding_fingerprint: 123,
+                rule: Arc::clone(&rule),
+                validation_response_body: validation_body_owned.clone(),
+                validation_response_status: validation_status,
+                validation_success,
+                calculated_entropy: 5.29,
+                visible: true,
+                is_base64: false,
+            },
+            comment: None,
+            match_confidence: Confidence::Medium,
+            visible: true,
+            validation_response_body: validation_body_owned,
+            validation_response_status: validation_status,
+            validation_success,
         };
 
+        (report_match, blob_path)
+    }
+
+    #[test]
+    fn build_finding_record_uses_git_blob_path() {
+        let temp = tempdir().unwrap();
+        let datastore =
+            Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
+        let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
+
+        let (report_match, blob_path) =
+            sample_report_match("Bad credentials", StatusCode::UNAUTHORIZED.as_u16(), false);
+
+        let scan_args = sample_scan_args();
+
         let record = reporter.build_finding_record(&report_match, &scan_args);
         assert_eq!(record.finding.path, blob_path);
         let git_file_path = record
@@ -880,6 +907,28 @@ mod tests {
         assert_eq!(git_file_path, "path/in/history.txt");
     }
 
+    #[test]
+    fn skip_list_matches_surface_skip_reason() {
+        let temp = tempdir().unwrap();
+        let datastore =
+            Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
+        let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
+
+        let (report_match, _) = sample_report_match(
+            "(skip list entry) AWS validation not attempted for account 111122223333.",
+            StatusCode::CONTINUE.as_u16(),
+            false,
+        );
+        let scan_args = sample_scan_args();
+
+        let record = reporter.build_finding_record(&report_match, &scan_args);
+        assert_eq!(record.finding.validation.status, "Not Attempted");
+        assert_eq!(
+            record.finding.validation.response,
+            "(skip list entry) AWS validation not attempted for account 111122223333."
+        );
+    }
+
     use super::build_git_urls;
 
     #[test]
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
index 02dc20c..6435c71 100644
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@@ -93,6 +93,14 @@ mod tests {
                 gitlab_repo_type: GitLabRepoType::All,
                 gitlab_include_subgroups: false,
 
+                // Hugging Face
+                huggingface_user: Vec::new(),
+                huggingface_organization: Vec::new(),
+                huggingface_model: Vec::new(),
+                huggingface_dataset: Vec::new(),
+                huggingface_space: Vec::new(),
+                huggingface_exclude: Vec::new(),
+
                 // Gitea
                 gitea_user: Vec::new(),
                 gitea_organization: Vec::new(),
@@ -132,6 +140,9 @@ mod tests {
                 s3_prefix: None,
                 role_arn: None,
                 aws_local_profile: None,
+                gcs_bucket: None,
+                gcs_prefix: None,
+                gcs_service_account: None,
 
                 docker_image: Vec::new(),
                 // clone / history options
@@ -163,6 +174,8 @@ mod tests {
             manage_baseline: false,
             skip_regex: Vec::new(),
             skip_word: Vec::new(),
+            skip_aws_account: Vec::new(),
+            skip_aws_account_file: None,
             no_base64: false,
             no_inline_ignore: false,
         }
diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs
index 086648c..ea31fc5 100644
--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@@ -115,7 +115,7 @@ impl<'a> Display for PrettyFindingRecord<'a> {
         } else {
             writeln!(f, " |Validation....: {}", finding.validation.status)?;
         }
-        if finding.validation.status != "Not Attempted" {
+        if !finding.validation.response.is_empty() {
             writeln!(f, " |__Response....: {}", style_fn(&finding.validation.response))?;
         }
         writeln!(f, " |Language......: {}", finding.language)?;
diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs
index a6e0b6a..6d98b34 100644
--- a/src/scanner/mod.rs
+++ b/src/scanner/mod.rs
@@ -3,7 +3,7 @@ pub(crate) use docker::save_docker_images;
 pub(crate) use enumerate::enumerate_filesystem_inputs;
 pub(crate) use repos::{
     clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos,
-    enumerate_github_repos,
+    enumerate_github_repos, enumerate_huggingface_repos,
 };
 pub use runner::{load_and_record_rules, run_async_scan, run_scan};
 pub(crate) use validation::run_secret_validation;
diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs
index fb6577a..da00dcc 100644
--- a/src/scanner/repos.rs
+++ b/src/scanner/repos.rs
@@ -17,10 +17,10 @@ use crate::{
         commands::{github::GitCloneMode, github::GitHistoryMode, scan},
         global,
     },
-    confluence, findings_store,
+    confluence, findings_store, gcs,
     git_binary::{CloneMode, Git},
     git_url::GitUrl,
-    gitea, github, gitlab, jira,
+    gitea, github, gitlab, huggingface, jira,
     matcher::{Match, Matcher, MatcherStats},
     origin::{Origin, OriginSet},
     rules_database::RulesDatabase,
@@ -305,6 +305,69 @@ pub async fn enumerate_gitea_repos(
     Ok(repo_urls)
 }
 
+pub async fn enumerate_huggingface_repos(
+    args: &scan::ScanArgs,
+    global_args: &global::GlobalArgs,
+) -> Result<Vec<GitUrl>> {
+    let repo_specifiers = huggingface::RepoSpecifiers {
+        user: args.input_specifier_args.huggingface_user.clone(),
+        organization: args.input_specifier_args.huggingface_organization.clone(),
+        model: args.input_specifier_args.huggingface_model.clone(),
+        dataset: args.input_specifier_args.huggingface_dataset.clone(),
+        space: args.input_specifier_args.huggingface_space.clone(),
+        exclude: args.input_specifier_args.huggingface_exclude.clone(),
+    };
+
+    let mut repo_urls = args.input_specifier_args.git_url.clone();
+    if !repo_specifiers.is_empty() {
+        let mut progress = if global_args.use_progress() {
+            let style =
+                ProgressStyle::with_template("{spinner} {msg} {human_len} [{elapsed_precise}]")
+                    .expect("progress bar style template should compile");
+            let pb = ProgressBar::new_spinner()
+                .with_style(style)
+                .with_message("Enumerating Hugging Face repositories...");
+            pb.enable_steady_tick(Duration::from_millis(500));
+            pb
+        } else {
+            ProgressBar::hidden()
+        };
+
+        let mut num_found: u64 = 0;
+        let auth = huggingface::AuthConfig::from_env();
+        let repo_strings = huggingface::enumerate_repo_urls(
+            &repo_specifiers,
+            &auth,
+            global_args.ignore_certs,
+            Some(&mut progress),
+        )
+        .await
+        .context("Failed to enumerate Hugging Face repositories")?;
+
+        for repo_string in repo_strings {
+            match GitUrl::from_str(&repo_string) {
+                Ok(repo_url) => {
+                    repo_urls.push(repo_url);
+                    num_found += 1;
+                }
+                Err(e) => {
+                    progress.suspend(|| {
+                        error!("Failed to parse repo URL from {repo_string}: {e}");
+                    });
+                }
+            }
+        }
+
+        progress.finish_with_message(format!(
+            "Found {} repositories from Hugging Face",
+            HumanCount(num_found)
+        ));
+    }
+    repo_urls.sort();
+    repo_urls.dedup();
+    Ok(repo_urls)
+}
+
 pub async fn enumerate_bitbucket_repos(
     args: &scan::ScanArgs,
     global_args: &global::GlobalArgs,
@@ -678,3 +741,81 @@ pub async fn fetch_s3_objects(
 
     Ok(())
 }
+
+pub async fn fetch_gcs_objects(
+    args: &scan::ScanArgs,
+    datastore: &Arc<Mutex<findings_store::FindingsStore>>,
+    rules_db: &RulesDatabase,
+    matcher_stats: &Mutex<MatcherStats>,
+    enable_profiling: bool,
+    shared_profiler: Arc<crate::rule_profiling::ConcurrentRuleProfiler>,
+    progress_enabled: bool,
+) -> Result<()> {
+    let Some(bucket) = args.input_specifier_args.gcs_bucket.as_deref() else {
+        return Ok(());
+    };
+    let prefix = args.input_specifier_args.gcs_prefix.as_deref();
+    let service_account = args.input_specifier_args.gcs_service_account.as_deref();
+
+    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+    let seen_blobs = BlobIdMap::new();
+    let matcher = Matcher::new(
+        rules_db,
+        scanner_pool,
+        &seen_blobs,
+        Some(matcher_stats),
+        enable_profiling,
+        Some(shared_profiler.clone()),
+        &args.extra_ignore_comments,
+        args.no_inline_ignore,
+    )?;
+    let mut processor = BlobProcessor { matcher };
+
+    let progress = if progress_enabled {
+        let style =
+            ProgressStyle::with_template("{spinner} {msg} ({pos} objects) [{elapsed_precise}]")
+                .expect("progress bar style template should compile");
+        let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching GCS objects");
+        pb.enable_steady_tick(Duration::from_millis(500));
+        pb
+    } else {
+        ProgressBar::hidden()
+    };
+
+    let pb = progress.clone();
+
+    let bucket_name = bucket.to_string();
+
+    gcs::visit_bucket_objects(bucket, prefix, service_account, move |key, bytes| {
+        let origin = OriginSet::new(
+            Origin::from_extended(serde_json::json!({
+                "path": format!("gs://{}/{}", bucket_name, key)
+            })),
+            Vec::new(),
+        );
+        let blob = crate::blob::Blob::from_bytes(bytes);
+
+        if let Some((origin, blob_md, scored_matches)) =
+            processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)?
+        {
+            let origin_arc = Arc::new(origin);
+            let blob_arc = Arc::new(blob_md);
+
+            let mut batch = Vec::with_capacity(scored_matches.len());
+            for (_score, m) in scored_matches {
+                batch.push((origin_arc.clone(), blob_arc.clone(), m));
+            }
+
+            let added = datastore.lock().unwrap().record(batch, !args.no_dedup);
+            debug!("Added {} new GCS blobs", added);
+        }
+        pb.inc(1);
+        Ok(())
+    })
+    .await?;
+
+    let total = progress.position();
+    progress.finish_with_message(format!("Fetched {} GCS objects", total));
+
+    Ok(())
+}
diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs
index 9de4a00..25dae47 100644
--- a/src/scanner/runner.rs
+++ b/src/scanner/runner.rs
@@ -1,4 +1,7 @@
-use std::sync::{Arc, Mutex};
+use std::{
+    fs,
+    sync::{Arc, Mutex},
+};
 
 use anyhow::{bail, Context, Result};
 use crossbeam_skiplist::SkipMap;
@@ -21,10 +24,11 @@ use crate::{
     safe_list,
     scanner::{
         clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos,
-        enumerate_filesystem_inputs, enumerate_github_repos,
+        enumerate_filesystem_inputs, enumerate_github_repos, enumerate_huggingface_repos,
         repos::{
             enumerate_gitea_repos, enumerate_gitlab_repos, fetch_confluence_pages,
-            fetch_git_host_artifacts, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages,
+            fetch_gcs_objects, fetch_git_host_artifacts, fetch_jira_issues, fetch_s3_objects,
+            fetch_slack_messages,
         },
         run_secret_validation, save_docker_images,
         summary::print_scan_summary,
@@ -74,12 +78,14 @@ pub async fn run_async_scan(
     let mut repo_urls = enumerate_github_repos(args, global_args).await?;
     let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?;
     let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?;
+    let huggingface_repo_urls = enumerate_huggingface_repos(args, global_args).await?;
     let bitbucket_repo_urls = enumerate_bitbucket_repos(args, global_args).await?;
     let azure_repo_urls = enumerate_azure_repos(args, global_args).await?;
 
     // Combine repository URLs
     repo_urls.extend(gitlab_repo_urls);
     repo_urls.extend(gitea_repo_urls);
+    repo_urls.extend(huggingface_repo_urls);
     repo_urls.extend(bitbucket_repo_urls);
     repo_urls.extend(azure_repo_urls);
     repo_urls.sort();
@@ -182,8 +188,20 @@ pub async fn run_async_scan(
     )
     .await?;
 
-    let has_s3 = args.input_specifier_args.s3_bucket.is_some();
-    if input_roots.is_empty() && !has_s3 {
+    fetch_gcs_objects(
+        args,
+        &datastore,
+        rules_db,
+        &matcher_stats,
+        enable_profiling,
+        Arc::clone(&shared_profiler),
+        progress_enabled,
+    )
+    .await?;
+
+    let has_remote_objects = args.input_specifier_args.s3_bucket.is_some()
+        || args.input_specifier_args.gcs_bucket.is_some();
+    if input_roots.is_empty() && !has_remote_objects {
         bail!("No inputs to scan");
     }
 
@@ -231,6 +249,26 @@ pub async fn run_async_scan(
         crate::baseline::apply_baseline(&mut ds, &path, args.manage_baseline, &input_roots)?;
     }
 
+    let mut skip_aws_accounts = args.skip_aws_account.clone();
+
+    if let Some(path) = args.skip_aws_account_file.as_ref() {
+        let contents = fs::read_to_string(path).with_context(|| {
+            format!("Failed to read --skip-aws-account-file {}", path.display())
+        })?;
+
+        for line in contents.lines() {
+            let content = line.split('#').next().unwrap_or("");
+            for value in content.split(|c: char| c.is_ascii_whitespace() || c == ',' || c == ';') {
+                let trimmed = value.trim();
+                if !trimmed.is_empty() {
+                    skip_aws_accounts.push(trimmed.to_string());
+                }
+            }
+        }
+    }
+
+    crate::validation::set_skip_aws_account_ids(skip_aws_accounts);
+
     // If validation is enabled, run it as a second phase
     if !args.no_validate {
         info!("Starting secret validation phase...");
diff --git a/src/validation.rs b/src/validation.rs
index 81f97dd..b371bf0 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -95,6 +95,14 @@ pub fn init_validation_caches() {
     aws::set_aws_validation_concurrency(15);
 }
 
+pub fn set_skip_aws_account_ids<I, S>(ids: I)
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
+    aws::set_aws_skip_account_ids(ids);
+}
+
 #[derive(Clone)]
 pub struct CachedResponse {
     pub body: String,
@@ -781,6 +789,26 @@ async fn timed_validate_single_match<'a>(
                 }
             }
 
+            if let Some(account_id) = aws::should_skip_aws_validation(&akid) {
+                m.validation_success = false;
+                m.validation_response_body = format!(
+                    "(skip list entry) AWS validation not attempted for account {}.",
+                    account_id
+                );
+                m.validation_response_status = StatusCode::CONTINUE;
+                cache.insert(
+                    cache_key,
+                    CachedResponse {
+                        body: m.validation_response_body.clone(),
+                        status: m.validation_response_status,
+                        is_valid: m.validation_success,
+                        timestamp: Instant::now(),
+                    },
+                );
+                commit_and_return(m);
+                return;
+            }
+
             if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) {
                 m.validation_success = false;
                 m.validation_response_body = format!("Invalid AWS credentials ({}): {}", akid, e);
diff --git a/src/validation/aws.rs b/src/validation/aws.rs
index 7e1073d..156b32c 100644
--- a/src/validation/aws.rs
+++ b/src/validation/aws.rs
@@ -1,7 +1,7 @@
-use std::time::Duration;
+use std::{collections::HashSet, sync::RwLock, time::Duration};
 
 use anyhow::{anyhow, Result};
-use aws_config::{retry::RetryConfig, BehaviorVersion};
+use aws_config::{retry::RetryConfig, BehaviorVersion, SdkConfig};
 use aws_credential_types::Credentials;
 use aws_sdk_sts::{
     config::Builder as StsConfigBuilder, error::SdkError,
@@ -26,8 +26,9 @@ use http::{
     header::{HeaderValue, USER_AGENT},
     StatusCode,
 };
-use once_cell::sync::OnceCell;
+use once_cell::sync::{Lazy, OnceCell};
 use rand::{rng, Rng};
+use regex::Regex;
 use tokio::{
     sync::Semaphore,
     time::{sleep, timeout},
@@ -36,6 +37,57 @@ use tokio::{
 use crate::validation::GLOBAL_USER_AGENT;
 
 static AWS_VALIDATION_SEMAPHORE: OnceCell<Semaphore> = OnceCell::new();
+const BUILTIN_SKIP_ACCOUNT_IDS: &[&str] = &[
+    "052310077262",
+    "171436882533",
+    "528757803018",
+    "534261010715",
+    "595918472158",
+    "729780141977",
+    "893192397702",
+    "992382622183",
+];
+
+static AWS_SKIP_ACCOUNT_IDS: Lazy<RwLock<HashSet<String>>> = Lazy::new(|| {
+    let mut set = HashSet::new();
+    set.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
+    RwLock::new(set)
+});
+
+fn build_http_client() -> SharedHttpClient {
+    HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
+        let mut conn_builder = ConnectorBuilder::default()
+            .tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
+
+        conn_builder.set_connector_settings(settings.cloned());
+        if let Some(components) = runtime_components {
+            conn_builder.set_sleep_impl(components.sleep_impl());
+        }
+        conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
+        conn_builder.build()
+    })
+}
+
+async fn build_base_config(credentials: Credentials) -> SdkConfig {
+    let retry_config = RetryConfig::adaptive().with_max_attempts(3);
+    aws_config::defaults(BehaviorVersion::latest())
+        .region(Region::new("us-east-1"))
+        .credentials_provider(credentials)
+        .http_client(build_http_client())
+        .retry_config(retry_config)
+        .load()
+        .await
+}
+
+fn extract_account_id(input: &str) -> Option<String> {
+    let trimmed = input.trim();
+    if trimmed.len() == 12 && trimmed.chars().all(|c| c.is_ascii_digit()) {
+        return Some(trimmed.to_string());
+    }
+
+    static ACCOUNT_ID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d{12})").expect("valid regex"));
+    ACCOUNT_ID_RE.captures(trimmed).and_then(|caps| caps.get(1)).map(|m| m.as_str().to_string())
+}
 
 /// Set the maximum number of concurrent AWS validations. Call before first use.
 pub fn set_aws_validation_concurrency(max: usize) {
@@ -46,6 +98,46 @@ fn aws_validation_semaphore() -> &'static Semaphore {
     AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15))
 }
 
+pub fn set_aws_skip_account_ids<I, S>(ids: I)
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
+    let mut guard = match AWS_SKIP_ACCOUNT_IDS.write() {
+        Ok(g) => g,
+        Err(poisoned) => poisoned.into_inner(),
+    };
+    guard.clear();
+
+    guard.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
+
+    for raw in ids.into_iter() {
+        let value = raw.into();
+        if value.trim().is_empty() {
+            continue;
+        }
+        if let Some(normalized) = extract_account_id(&value) {
+            guard.insert(normalized);
+        } else {
+            tracing::warn!("Ignoring invalid AWS account ID in skip list: {value}");
+        }
+    }
+}
+
+pub fn should_skip_aws_validation(access_key_id: &str) -> Option<String> {
+    let guard = AWS_SKIP_ACCOUNT_IDS.read().ok()?;
+    if guard.is_empty() {
+        return None;
+    }
+
+    let account = aws_key_to_account_number(access_key_id).ok()?;
+    if guard.contains(&account) {
+        Some(account)
+    } else {
+        None
+    }
+}
+
 #[derive(Debug)]
 struct UaInterceptor;
 
@@ -132,29 +224,7 @@ pub async fn validate_aws_credentials(
         None,     // expiry
         "static", // provider name
     );
-    // Create HTTP client that respects proxy settings from the environment
-    let http_client: SharedHttpClient =
-        HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
-            let mut conn_builder = ConnectorBuilder::default()
-                .tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
-
-            conn_builder.set_connector_settings(settings.cloned());
-            if let Some(components) = runtime_components {
-                conn_builder.set_sleep_impl(components.sleep_impl());
-            }
-            conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
-            conn_builder.build()
-        });
-
-    // Create AWS config with adaptive retries
-    let retry_config = RetryConfig::adaptive().with_max_attempts(3);
-    let config = aws_config::defaults(BehaviorVersion::latest())
-        .region(Region::new("us-east-1"))
-        .credentials_provider(credentials)
-        .http_client(http_client)
-        .retry_config(retry_config)
-        .load()
-        .await;
+    let config = build_base_config(credentials).await;
 
     // Create STS client
     let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build();
@@ -230,3 +300,74 @@ pub fn aws_key_to_account_number(aws_key_id: &str) -> Result<String, Box<dyn std
     // Return the account number formatted as a 12-digit string
     Ok(format!("{:012}", account_num))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use once_cell::sync::Lazy;
+    use std::sync::Mutex;
+
+    static TEST_GUARD: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
+
+    #[test]
+    fn skip_account_list_normalizes_inputs() {
+        let _lock = TEST_GUARD.lock().unwrap();
+
+        set_aws_skip_account_ids([
+            " 052310077262 ",
+            "arn:aws:iam::171436882533:role/demo",
+            "invalid",
+        ]);
+
+        let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap();
+        assert!(guard.contains("052310077262"));
+        assert!(guard.contains("171436882533"));
+        assert_eq!(guard.len(), BUILTIN_SKIP_ACCOUNT_IDS.len());
+        drop(guard);
+
+        set_aws_skip_account_ids(Vec::<String>::new());
+    }
+
+    #[test]
+    fn should_skip_when_account_matches() {
+        let _lock = TEST_GUARD.lock().unwrap();
+
+        set_aws_skip_account_ids(["534261010715"]);
+        assert_eq!(
+            should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"),
+            Some("534261010715".to_string())
+        );
+
+        set_aws_skip_account_ids(Vec::<String>::new());
+    }
+
+    #[test]
+    fn builtin_canary_accounts_are_preseeded() {
+        let _lock = TEST_GUARD.lock().unwrap();
+
+        set_aws_skip_account_ids(Vec::<String>::new());
+        assert_eq!(
+            should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"),
+            Some("534261010715".to_string())
+        );
+
+        set_aws_skip_account_ids(Vec::<String>::new());
+    }
+
+    #[test]
+    fn duplicate_accounts_are_deduplicated() {
+        let _lock = TEST_GUARD.lock().unwrap();
+
+        set_aws_skip_account_ids([
+            "534261010715",
+            "arn:aws:iam::534261010715:user/canarytokens",
+            " 534261010715 ",
+        ]);
+
+        let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap();
+        assert_eq!(guard.iter().filter(|id| id.as_str() == "534261010715").count(), 1);
+        drop(guard);
+
+        set_aws_skip_account_ids(Vec::<String>::new());
+    }
+}
diff --git a/src/validation/utils.rs b/src/validation/utils.rs
index ea0b020..9e8422d 100644
--- a/src/validation/utils.rs
+++ b/src/validation/utils.rs
@@ -33,31 +33,77 @@ pub fn find_closest_variable(
     target_variable_name: &str,
     search_variable_name: &str,
 ) -> Option<String> {
-    // Find positions of the target variable with the target value
+    // Collect the positions of the target variable for the provided value so we can
+    // compare relative offsets with candidate variables.
     let mut target_positions = Vec::new();
     for (name, value, start, end) in captures {
         if name == target_variable_name && value == target_value {
             target_positions.push((*start, *end));
         }
     }
+
     if target_positions.is_empty() {
         return None;
     }
-    // For each target position, find the closest search variable
-    let mut closest_distance = usize::MAX;
-    let mut closest_value: Option<String> = None;
-    for (_target_start, target_end) in target_positions {
-        for (name, value, start, _) in captures {
-            if name == search_variable_name {
-                let distance = (*start as isize - target_end as isize).abs() as usize;
-                if distance < closest_distance {
-                    closest_distance = distance;
-                    closest_value = Some(value.clone());
+
+    // Prefer candidates that appear before the target value (same logical block), but
+    // fall back to overlapping values and then to those that appear after the target
+    // value when no better match exists. This avoids pairing with the next block when
+    // multiple credentials are close together in the same file.
+    let mut best_before: Option<(usize, String)> = None;
+    let mut best_overlap: Option<(usize, String)> = None;
+    let mut best_after: Option<(usize, String)> = None;
+
+    for (target_start, target_end) in target_positions.iter().copied() {
+        for (name, value, start, end) in captures {
+            if name != search_variable_name {
+                continue;
+            }
+
+            if *end <= target_start {
+                // Candidate is before the target; choose the one closest to the target start.
+                let distance = target_start - *end;
+                match &mut best_before {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_before = Some((distance, value.clone()));
+                    }
+                    _ => {}
+                }
+            } else if *start >= target_end {
+                // Candidate is after the target; choose the one closest to the target end.
+                let distance = *start - target_end;
+                match &mut best_after {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_after = Some((distance, value.clone()));
+                    }
+                    _ => {}
+                }
+            } else {
+                // Candidate overlaps the target – treat as an exact match.
+                let distance = 0usize;
+                match &mut best_overlap {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_overlap = Some((distance, value.clone()));
+                    }
+                    _ => {}
                 }
             }
         }
     }
-    closest_value
+
+    best_before.or(best_overlap).or(best_after).map(|(_, value)| value)
 }
 
 pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::Error>> {
@@ -147,4 +193,31 @@ mod tests {
             ]
         );
     }
+
+    #[test]
+    fn prefers_closest_preceding_variable() {
+        let captures = vec![
+            ("TOKEN".to_string(), "secret".to_string(), 75usize, 115usize),
+            ("AKID".to_string(), "preceding".to_string(), 30usize, 50usize),
+            ("AKID".to_string(), "following".to_string(), 180usize, 200usize),
+        ];
+
+        let result =
+            find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
+
+        assert_eq!(result, "preceding".to_string());
+    }
+
+    #[test]
+    fn falls_back_to_following_when_no_preceding() {
+        let captures = vec![
+            ("TOKEN".to_string(), "secret".to_string(), 10usize, 50usize),
+            ("AKID".to_string(), "after".to_string(), 60usize, 80usize),
+        ];
+
+        let result =
+            find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
+
+        assert_eq!(result, "after".to_string());
+    }
 }
diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs
index 3947548..54379a3 100644
--- a/tests/int_allowlist.rs
+++ b/tests/int_allowlist.rs
@@ -72,6 +72,12 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
             gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -103,6 +109,9 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             docker_image: Vec::new(),
             git_clone: GitCloneMode::Bare,
             git_history: GitHistoryMode::Full,
@@ -133,6 +142,8 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
         manage_baseline: false,
         skip_regex: skip_regex,
         skip_word: skip_skipword,
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         no_inline_ignore: false,
     };
diff --git a/tests/int_bitbucket.rs b/tests/int_bitbucket.rs
index 7b6ed52..eb69b9c 100644
--- a/tests/int_bitbucket.rs
+++ b/tests/int_bitbucket.rs
@@ -68,6 +68,13 @@ fn test_bitbucket_remote_scan() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -102,6 +109,9 @@ fn test_bitbucket_remote_scan() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             docker_image: Vec::new(),
             git_clone: GitCloneMode::Bare,
             git_history: GitHistoryMode::Full,
@@ -131,6 +141,8 @@ fn test_bitbucket_remote_scan() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs
index e86c589..2f1998c 100644
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@@ -85,6 +85,13 @@ rules:
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -120,6 +127,9 @@ rules:
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             // Docker image scanning
             docker_image: Vec::new(),
             // git clone / history options
@@ -151,6 +161,8 @@ rules:
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_github.rs b/tests/int_github.rs
index dcf936a..82a0f78 100644
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@@ -72,6 +72,13 @@ fn test_github_remote_scan() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -107,6 +114,9 @@ fn test_github_remote_scan() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             // Docker image scanning
             docker_image: Vec::new(),
             // git clone / history options
@@ -138,6 +148,8 @@ fn test_github_remote_scan() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs
index 7c2e0b9..745f323 100644
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@@ -71,6 +71,13 @@ fn test_gitlab_remote_scan() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -106,6 +113,9 @@ fn test_gitlab_remote_scan() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             // Docker image scanning
             docker_image: Vec::new(),
             git_clone: GitCloneMode::Bare,
@@ -137,6 +147,8 @@ fn test_gitlab_remote_scan() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         no_inline_ignore: false,
     };
@@ -210,6 +222,13 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -252,6 +271,9 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
             scan_nested_repos: true,
             since_commit: None,
             branch: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
         },
         content_filtering_args: ContentFilteringArgs {
             max_file_size_mb: 25.0,
@@ -273,6 +295,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_redact.rs b/tests/int_redact.rs
index 7d7accb..a570cfe 100644
--- a/tests/int_redact.rs
+++ b/tests/int_redact.rs
@@ -55,6 +55,12 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
             gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -86,6 +92,9 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             docker_image: Vec::new(),
             git_clone: GitCloneMode::Bare,
             git_history: GitHistoryMode::Full,
@@ -115,6 +124,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_slack.rs b/tests/int_slack.rs
index 5f2d01a..dd9df68 100644
--- a/tests/int_slack.rs
+++ b/tests/int_slack.rs
@@ -62,6 +62,13 @@ impl TestContext {
                 gitlab_repo_type: GitLabRepoType::Owner,
                 gitlab_include_subgroups: false,
 
+                huggingface_user: Vec::new(),
+                huggingface_organization: Vec::new(),
+                huggingface_model: Vec::new(),
+                huggingface_dataset: Vec::new(),
+                huggingface_space: Vec::new(),
+                huggingface_exclude: Vec::new(),
+
                 gitea_user: Vec::new(),
                 gitea_organization: Vec::new(),
                 gitea_exclude: Vec::new(),
@@ -92,6 +99,9 @@ impl TestContext {
                 s3_prefix: None,
                 role_arn: None,
                 aws_local_profile: None,
+                gcs_bucket: None,
+                gcs_prefix: None,
+                gcs_service_account: None,
                 max_results: 10,
                 docker_image: Vec::new(),
                 git_clone: GitCloneMode::Bare,
@@ -123,6 +133,8 @@ impl TestContext {
             manage_baseline: false,
             skip_regex: Vec::new(),
             skip_word: Vec::new(),
+            skip_aws_account: Vec::new(),
+            skip_aws_account_file: None,
             no_base64: false,
             no_inline_ignore: false,
         };
@@ -186,6 +198,13 @@ async fn test_scan_slack_messages() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -218,6 +237,9 @@ async fn test_scan_slack_messages() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             docker_image: Vec::new(),
             git_clone: GitCloneMode::Bare,
             git_history: GitHistoryMode::Full,
@@ -247,6 +269,8 @@ async fn test_scan_slack_messages() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs
index 74cf6e4..ba81576 100644
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@@ -128,6 +128,13 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
             gitlab_repo_type: GitLabRepoType::Owner,
             gitlab_include_subgroups: false,
 
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+
             gitea_user: Vec::new(),
             gitea_organization: Vec::new(),
             gitea_exclude: Vec::new(),
@@ -163,6 +170,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
             s3_prefix: None,
             role_arn: None,
             aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
             // Docker image scanning
             docker_image: Vec::new(),
             // git clone / history options
@@ -194,6 +204,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
         manage_baseline: false,
         skip_regex: Vec::new(),
         skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
         no_base64: false,
         extra_ignore_comments: Vec::new(),
         no_inline_ignore: false,
diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs
index 464377a..5dec6b5 100644
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@@ -71,6 +71,13 @@ impl TestContext {
                 gitlab_repo_type: GitLabRepoType::Owner,
                 gitlab_include_subgroups: false,
 
+                huggingface_user: Vec::new(),
+                huggingface_organization: Vec::new(),
+                huggingface_model: Vec::new(),
+                huggingface_dataset: Vec::new(),
+                huggingface_space: Vec::new(),
+                huggingface_exclude: Vec::new(),
+
                 gitea_user: Vec::new(),
                 gitea_organization: Vec::new(),
                 gitea_exclude: Vec::new(),
@@ -106,6 +113,9 @@ impl TestContext {
                 s3_prefix: None,
                 role_arn: None,
                 aws_local_profile: None,
+                gcs_bucket: None,
+                gcs_prefix: None,
+                gcs_service_account: None,
                 // Docker image scanning
                 docker_image: Vec::new(),
                 // git clone / history options
@@ -137,6 +147,8 @@ impl TestContext {
             manage_baseline: false,
             skip_regex: Vec::new(),
             skip_word: Vec::new(),
+            skip_aws_account: Vec::new(),
+            skip_aws_account_file: None,
             no_base64: false,
             extra_ignore_comments: Vec::new(),
             no_inline_ignore: false,
@@ -183,6 +195,13 @@ impl TestContext {
                 gitlab_repo_type: GitLabRepoType::Owner,
                 gitlab_include_subgroups: false,
 
+                huggingface_user: Vec::new(),
+                huggingface_organization: Vec::new(),
+                huggingface_model: Vec::new(),
+                huggingface_dataset: Vec::new(),
+                huggingface_space: Vec::new(),
+                huggingface_exclude: Vec::new(),
+
                 gitea_user: Vec::new(),
                 gitea_organization: Vec::new(),
                 gitea_exclude: Vec::new(),
@@ -228,6 +247,10 @@ impl TestContext {
                 scan_nested_repos: true,
                 since_commit: None,
                 branch: None,
+
+                gcs_bucket: None,
+                gcs_prefix: None,
+                gcs_service_account: None,
             },
             extra_ignore_comments: Vec::new(),
             content_filtering_args: ContentFilteringArgs {
@@ -250,6 +273,8 @@ impl TestContext {
             manage_baseline: false,
             skip_regex: Vec::new(),
             skip_word: Vec::new(),
+            skip_aws_account: Vec::new(),
+            skip_aws_account_file: None,
             no_base64: false,
             no_inline_ignore: false,
         };