forked from mirrors/kingfisher
updated README
This commit is contained in:
parent
572d8146e7
commit
b06baeb7bd
6 changed files with 48 additions and 57 deletions
|
|
@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
|
|||
|
||||
## [1.22.0]
|
||||
- Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai
|
||||
- Upgraded dependencies
|
||||
|
||||
## [1.21.0]
|
||||
- Improved Azure Storage rule
|
||||
|
|
|
|||
16
Cargo.toml
16
Cargo.toml
|
|
@ -30,7 +30,7 @@ clap = { version = "4.5", features = [
|
|||
anyhow = "1.0"
|
||||
bstr = { version = "1.12", features = ["serde"] }
|
||||
fixedbitset = "0.5"
|
||||
gix = { version = "0.72", features = ["max-performance", "serde", "blocking-network-client"] }
|
||||
gix = { version = "0.73", features = ["max-performance", "serde", "blocking-network-client"] }
|
||||
ignore = "0.4"
|
||||
petgraph = "0.6"
|
||||
roaring = "0.10"
|
||||
|
|
@ -102,9 +102,9 @@ octorust = "0.9.0"
|
|||
reqwest-middleware = "0.4.2"
|
||||
tracing-subscriber = {version = "0.3.19", features = ["env-filter"] }
|
||||
tracing-core = "0.1.34"
|
||||
tree-sitter = "0.24.7"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter-c = "0.23.4"
|
||||
tree-sitter = "0.25.8"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
tree-sitter-c = "0.24.1"
|
||||
tree-sitter-c-sharp = "0.23.1"
|
||||
tree-sitter-cpp = "0.23.4"
|
||||
tree-sitter-css = "0.23.2"
|
||||
|
|
@ -115,10 +115,10 @@ tree-sitter-javascript = "0.23.1"
|
|||
tree-sitter-php = "0.23.11"
|
||||
tree-sitter-python = "0.23.6"
|
||||
tree-sitter-ruby = "0.23.1"
|
||||
tree-sitter-rust = "0.23.3"
|
||||
tree-sitter-rust = "0.24.0"
|
||||
tree-sitter-toml-ng = "0.7.0"
|
||||
tree-sitter-typescript = "0.23.2"
|
||||
tree-sitter-yaml = "0.6.1"
|
||||
tree-sitter-yaml = "0.7.1"
|
||||
streaming-iterator = "0.1.9"
|
||||
tree-sitter-regex = "0.24.3"
|
||||
content_inspector = "0.2.4"
|
||||
|
|
@ -140,7 +140,7 @@ sha2 = "0.10.9"
|
|||
strum_macros = "0.27.1"
|
||||
humantime = "2.2.0"
|
||||
path-dedot = "3.1.1"
|
||||
quick-xml = {version = "0.37.5", features = ["serde","serialize"] }
|
||||
quick-xml = {version = "0.38.0", features = ["serde","serialize"] }
|
||||
rustls = "0.23.29"
|
||||
tokio-postgres-rustls = "0.13.0"
|
||||
rustls-native-certs = "0.8.1"
|
||||
|
|
@ -148,7 +148,7 @@ predicates = "3.1.3"
|
|||
assert_cmd = "2.0.17"
|
||||
proptest = "1.7.0"
|
||||
color-backtrace = "0.7.0"
|
||||
gitlab = "0.1711.0"
|
||||
gitlab = "0.1801.0"
|
||||
mimalloc = {version = "0.1.47", features = ["override"]}
|
||||
thread_local = "1.1.9"
|
||||
crc32fast = "1.5.0"
|
||||
|
|
|
|||
4
Makefile
4
Makefile
|
|
@ -368,10 +368,10 @@ all: linux darwin
|
|||
|
||||
dockerfile:
|
||||
# Build for the host architecture (default)
|
||||
docker build -t kingfisher:latest .
|
||||
docker build -f docker/Dockerfile -t kingfisher:latest .
|
||||
|
||||
# Cross‑build for arm64 from an x64 machine
|
||||
docker buildx build --platform linux/arm64 -t kingfisher:arm64 .
|
||||
docker buildx build -f docker/Dockerfile --platform linux/arm64 -t kingfisher:arm64 .
|
||||
|
||||
list-archives:
|
||||
@echo -e "\n=== Built archives ==="
|
||||
|
|
|
|||
64
README.md
64
README.md
|
|
@ -58,27 +58,23 @@ make darwin-all # builds both x64 and arm64
|
|||
make all # builds for every OS and architecture supported
|
||||
```
|
||||
|
||||
### Run Kingfisher in Docker
|
||||
# 🔐 Detection Rules at a Glance
|
||||
|
||||
Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets.
|
||||
Below is an overview; click any category to see the exact rule IDs.
|
||||
|
||||
Run the dockerized Kingfisher container:
|
||||
```bash
|
||||
# GitHub Container Registry
|
||||
docker run --rm ghcr.io/mongodb/kingfisher:latest --version
|
||||
| Category | What we catch |
|
||||
|----------|---------------|
|
||||
| **AI / LLM APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), and more
|
||||
| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more
|
||||
| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm & PyPI publish token, and more
|
||||
| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun/SendGrid/Mailchimp, and more
|
||||
| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more
|
||||
| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more
|
||||
| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more
|
||||
| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and mmore
|
||||
|
||||
# Scan the current working directory
|
||||
# (mounts your code at /src and scans it)
|
||||
docker run --rm -v "$PWD":/src ghcr.io/mongodb/kingfisher:latest scan /src
|
||||
|
||||
# Scan while providing a GitHub token
|
||||
docker run --rm -e KF_GITHUB_TOKEN=ghp_… -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan --git-url https://github.com/org/private_repo.git
|
||||
|
||||
# Scan and output as json
|
||||
docker run --rm -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan /proj --format json --output findings.json
|
||||
```
|
||||
|
||||
|
||||
# Write Custom Rules!
|
||||
## Write Custom Rules!
|
||||
|
||||
Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential.
|
||||
|
||||
|
|
@ -388,33 +384,15 @@ By integrating Kingfisher into your development lifecycle, you can:
|
|||
|
||||
## The Risk of Leaked Secrets
|
||||
|
||||
Embedding credentials in code repositories is a pervasive, ever‑present risk that leads directly to data breaches:
|
||||
Real breaches show how one exposed key can snowball into a full-scale incident:
|
||||
|
||||
1. **Uber (2016)**
|
||||
- **Uber (2016):** GitHub-hosted AWS key let attackers access data on 57 M riders and 600 k drivers. [[BBC](https://www.bbc.com/news/technology-42075306)] [[Ars](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)]
|
||||
- **AWS engineer (2020):** Pushed log files with root credentials to GitHub. [[Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/)] [[UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)]
|
||||
- **Infosys (2023):** Full-admin AWS key left in a public PyPI package for a year. [[Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/)] [[Blog](https://tomforb.es/blog/infosys-leak/)]
|
||||
- **Microsoft (2023):** Azure SAS token in an AI repo exposed 38 TB of internal data. [[Wiz](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers)] [[TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)]
|
||||
- **GitHub (2023):** RSA SSH host key briefly went public; company rotated it. [[GitHub](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)]
|
||||
|
||||
- _Incident_: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers.
|
||||
- _Sources_: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)
|
||||
|
||||
2. **AWS**
|
||||
|
||||
- _Incident_: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo.
|
||||
- _Sources_: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)
|
||||
|
||||
3. **Infosys**
|
||||
|
||||
- _Incident_: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year.
|
||||
- _Sources_: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/)
|
||||
|
||||
4. **Microsoft**
|
||||
|
||||
- _Incident_: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages).
|
||||
- _Sources_: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)
|
||||
|
||||
5. **GitHub**
|
||||
- _Incident_: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution.
|
||||
- _Sources_: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)
|
||||
|
||||
Left unchecked, leaked secrets can lead to unauthorized access, pivoting within your environment, regulatory fines, and brand‑damaging incident response costs.
|
||||
Leaked secrets fuel unauthorized access, lateral movement, regulatory fines, and brand-damaging incident-response costs.
|
||||
|
||||
# Benchmark Results
|
||||
|
||||
|
|
|
|||
|
|
@ -16,13 +16,25 @@ RUN set -eux; \
|
|||
*) echo "unsupported arch ${TARGETARCH}" >&2; exit 1 ;; \
|
||||
esac; \
|
||||
# download & unpack
|
||||
LATEST_URL=$(curl -s https://api.github.com/repos/mongodb/kingfisher/releases/latest \
|
||||
LATEST_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \
|
||||
| grep -Eo "https://[^\"]*${SUFFIX}"); \
|
||||
curl -L "$LATEST_URL" -o kingfisher.tgz; \
|
||||
if [ -z "$LATEST_URL" ]; then \
|
||||
echo "Failed to fetch the latest release URL for ${SUFFIX}" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
curl -fsSL "$LATEST_URL" -o kingfisher.tgz; \
|
||||
CHECKSUM_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \
|
||||
| grep -Eo "https://[^\"]*checksums.txt"); \
|
||||
curl -fsSL "$CHECKSUM_URL" -o checksums.txt; \
|
||||
EXPECTED_CHECKSUM=$(grep "${SUFFIX}" checksums.txt | awk '{print $1}'); \
|
||||
echo "$EXPECTED_CHECKSUM kingfisher.tgz" | sha256sum -c -; \
|
||||
tar -xzf kingfisher.tgz; \
|
||||
rm kingfisher.tgz checksums.txt; \
|
||||
tar -xzf kingfisher.tgz; \
|
||||
rm kingfisher.tgz; \
|
||||
# locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64)
|
||||
KF_PATH=$(find . -type f -name 'kingfisher*' | head -n1); \
|
||||
KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \
|
||||
if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \
|
||||
install -m 0755 "$KF_PATH" /usr/local/bin/kingfisher; \
|
||||
# optional cleanup to keep the image small
|
||||
rm -rf /app/*
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ impl Language {
|
|||
Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()),
|
||||
Language::Toml => Ok(tree_sitter_toml_ng::LANGUAGE.into()),
|
||||
Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
|
||||
Language::Yaml => Ok(tree_sitter_yaml::language()),
|
||||
Language::Yaml => Ok(tree_sitter_yaml::LANGUAGE.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue