diff --git a/CHANGELOG.md b/CHANGELOG.md index f15b21d..d385891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.22.0] - Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai +- Upgraded dependencies ## [1.21.0] - Improved Azure Storage rule diff --git a/Cargo.toml b/Cargo.toml index e7650ac..4bf7e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ clap = { version = "4.5", features = [ anyhow = "1.0" bstr = { version = "1.12", features = ["serde"] } fixedbitset = "0.5" -gix = { version = "0.72", features = ["max-performance", "serde", "blocking-network-client"] } +gix = { version = "0.73", features = ["max-performance", "serde", "blocking-network-client"] } ignore = "0.4" petgraph = "0.6" roaring = "0.10" @@ -102,9 +102,9 @@ octorust = "0.9.0" reqwest-middleware = "0.4.2" tracing-subscriber = {version = "0.3.19", features = ["env-filter"] } tracing-core = "0.1.34" -tree-sitter = "0.24.7" -tree-sitter-bash = "0.23.3" -tree-sitter-c = "0.23.4" +tree-sitter = "0.25.8" +tree-sitter-bash = "0.25.0" +tree-sitter-c = "0.24.1" tree-sitter-c-sharp = "0.23.1" tree-sitter-cpp = "0.23.4" tree-sitter-css = "0.23.2" @@ -115,10 +115,10 @@ tree-sitter-javascript = "0.23.1" tree-sitter-php = "0.23.11" tree-sitter-python = "0.23.6" tree-sitter-ruby = "0.23.1" -tree-sitter-rust = "0.23.3" +tree-sitter-rust = "0.24.0" tree-sitter-toml-ng = "0.7.0" tree-sitter-typescript = "0.23.2" -tree-sitter-yaml = "0.6.1" +tree-sitter-yaml = "0.7.1" streaming-iterator = "0.1.9" tree-sitter-regex = "0.24.3" content_inspector = "0.2.4" @@ -140,7 +140,7 @@ sha2 = "0.10.9" strum_macros = "0.27.1" humantime = "2.2.0" path-dedot = "3.1.1" -quick-xml = {version = "0.37.5", features = ["serde","serialize"] } +quick-xml = {version = "0.38.0", features = ["serde","serialize"] } rustls = "0.23.29" tokio-postgres-rustls = "0.13.0" rustls-native-certs = "0.8.1" @@ -148,7 +148,7 @@ predicates = "3.1.3" assert_cmd = "2.0.17" proptest = "1.7.0" color-backtrace = "0.7.0" -gitlab = "0.1711.0" +gitlab = "0.1801.0" mimalloc = {version = "0.1.47", features = ["override"]} thread_local = "1.1.9" crc32fast = "1.5.0" diff --git a/Makefile b/Makefile index 41f74ba..fa97341 100644 --- a/Makefile +++ b/Makefile @@ -368,10 +368,10 @@ all: linux darwin dockerfile: # Build for the host architecture (default) - docker build -t kingfisher:latest . + docker build -f docker/Dockerfile -t kingfisher:latest . # Cross‑build for arm64 from an x64 machine - docker buildx build --platform linux/arm64 -t kingfisher:arm64 . + docker buildx build -f docker/Dockerfile --platform linux/arm64 -t kingfisher:arm64 . list-archives: @echo -e "\n=== Built archives ===" diff --git a/README.md b/README.md index 7e3e5cf..cfd4b24 100644 --- a/README.md +++ b/README.md @@ -58,27 +58,23 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` -### Run Kingfisher in Docker +# 🔐 Detection Rules at a Glance +Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. +Below is an overview; click any category to see the exact rule IDs. -Run the dockerized Kingfisher container: -```bash -# GitHub Container Registry -docker run --rm ghcr.io/mongodb/kingfisher:latest --version +| Category | What we catch | +|----------|---------------| +| **AI / LLM APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), and more +| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more +| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm & PyPI publish token, and more +| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun/SendGrid/Mailchimp, and more +| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more +| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more +| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more +| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and mmore -# Scan the current working directory -# (mounts your code at /src and scans it) -docker run --rm -v "$PWD":/src ghcr.io/mongodb/kingfisher:latest scan /src - -# Scan while providing a GitHub token -docker run --rm -e KF_GITHUB_TOKEN=ghp_… -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan --git-url https://github.com/org/private_repo.git - -# Scan and output as json -docker run --rm -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan /proj --format json --output findings.json -``` - - -# Write Custom Rules! +## Write Custom Rules! Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. @@ -388,33 +384,15 @@ By integrating Kingfisher into your development lifecycle, you can: ## The Risk of Leaked Secrets -Embedding credentials in code repositories is a pervasive, ever‑present risk that leads directly to data breaches: +Real breaches show how one exposed key can snowball into a full-scale incident: -1. **Uber (2016)** +- **Uber (2016):** GitHub-hosted AWS key let attackers access data on 57 M riders and 600 k drivers. [[BBC](https://www.bbc.com/news/technology-42075306)] [[Ars](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)] +- **AWS engineer (2020):** Pushed log files with root credentials to GitHub. [[Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/)] [[UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)] +- **Infosys (2023):** Full-admin AWS key left in a public PyPI package for a year. [[Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/)] [[Blog](https://tomforb.es/blog/infosys-leak/)] +- **Microsoft (2023):** Azure SAS token in an AI repo exposed 38 TB of internal data. [[Wiz](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers)] [[TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)] +- **GitHub (2023):** RSA SSH host key briefly went public; company rotated it. [[GitHub](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)] - - _Incident_: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers. - - _Sources_: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/) - -2. **AWS** - - - _Incident_: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo. - - _Sources_: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more) - -3. **Infosys** - - - _Incident_: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year. - - _Sources_: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/) - -4. **Microsoft** - - - _Incident_: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages). - - _Sources_: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/) - -5. **GitHub** - - _Incident_: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution. - - _Sources_: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/) - -Left unchecked, leaked secrets can lead to unauthorized access, pivoting within your environment, regulatory fines, and brand‑damaging incident response costs. +Leaked secrets fuel unauthorized access, lateral movement, regulatory fines, and brand-damaging incident-response costs. # Benchmark Results diff --git a/docker/Dockerfile b/docker/Dockerfile index 62da0a8..ea87c51 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,13 +16,25 @@ RUN set -eux; \ *) echo "unsupported arch ${TARGETARCH}" >&2; exit 1 ;; \ esac; \ # download & unpack - LATEST_URL=$(curl -s https://api.github.com/repos/mongodb/kingfisher/releases/latest \ + LATEST_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \ | grep -Eo "https://[^\"]*${SUFFIX}"); \ - curl -L "$LATEST_URL" -o kingfisher.tgz; \ + if [ -z "$LATEST_URL" ]; then \ + echo "Failed to fetch the latest release URL for ${SUFFIX}" >&2; \ + exit 1; \ + fi; \ + curl -fsSL "$LATEST_URL" -o kingfisher.tgz; \ + CHECKSUM_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \ + | grep -Eo "https://[^\"]*checksums.txt"); \ + curl -fsSL "$CHECKSUM_URL" -o checksums.txt; \ + EXPECTED_CHECKSUM=$(grep "${SUFFIX}" checksums.txt | awk '{print $1}'); \ + echo "$EXPECTED_CHECKSUM kingfisher.tgz" | sha256sum -c -; \ + tar -xzf kingfisher.tgz; \ + rm kingfisher.tgz checksums.txt; \ tar -xzf kingfisher.tgz; \ rm kingfisher.tgz; \ # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) - KF_PATH=$(find . -type f -name 'kingfisher*' | head -n1); \ + KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \ + if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \ install -m 0755 "$KF_PATH" /usr/local/bin/kingfisher; \ # optional cleanup to keep the image small rm -rf /app/* diff --git a/src/parser.rs b/src/parser.rs index 4625b6c..255335a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -98,7 +98,7 @@ impl Language { Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()), Language::Toml => Ok(tree_sitter_toml_ng::LANGUAGE.into()), Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()), - Language::Yaml => Ok(tree_sitter_yaml::language()), + Language::Yaml => Ok(tree_sitter_yaml::LANGUAGE.into()), } } }