From 76be1df60c0f6f9a879070a5987584a6a510b08c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 28 Jan 2026 10:27:24 -0800 Subject: [PATCH 01/19] Refactored into multiple crates. Added the 'validate' subcommand --- Cargo.toml | 14 + README.md | 113 +- crates/kingfisher-core/Cargo.toml | 48 + crates/kingfisher-core/src/blob.rs | 495 +++++++ crates/kingfisher-core/src/bstring_escape.rs | 128 ++ crates/kingfisher-core/src/entropy.rs | 89 ++ crates/kingfisher-core/src/error.rs | 36 + .../src/git_commit_metadata.rs | 159 +++ crates/kingfisher-core/src/lib.rs | 27 + crates/kingfisher-core/src/location.rs | 296 +++++ crates/kingfisher-core/src/origin.rs | 310 +++++ crates/kingfisher-rules/Cargo.toml | 64 + .../data}/rules/adafruitio.yml | 0 .../kingfisher-rules/data}/rules/adobe.yml | 0 .../kingfisher-rules/data}/rules/age.yml | 0 .../kingfisher-rules/data}/rules/ai21.yml | 0 .../kingfisher-rules/data}/rules/airbrake.yml | 0 .../kingfisher-rules/data}/rules/airtable.yml | 0 .../kingfisher-rules/data}/rules/aiven.yml | 0 .../kingfisher-rules/data}/rules/alchemy.yml | 0 .../kingfisher-rules/data}/rules/algolia.yml | 0 .../kingfisher-rules/data}/rules/alibaba.yml | 0 .../data}/rules/anthropic.yml | 0 .../kingfisher-rules/data}/rules/anypoint.yml | 0 .../kingfisher-rules/data}/rules/apify.yml | 0 .../kingfisher-rules/data}/rules/apollo.yml | 0 .../data}/rules/artifactory.yml | 0 .../kingfisher-rules/data}/rules/asana.yml | 0 .../data}/rules/assemblyai.yml | 0 .../data}/rules/atlassian.yml | 0 .../kingfisher-rules/data}/rules/auth0.yml | 0 .../kingfisher-rules/data}/rules/authress.yml | 0 .../kingfisher-rules/data}/rules/aws.yml | 0 .../kingfisher-rules/data}/rules/azure.yml | 0 .../data}/rules/azuredevops.yml | 0 .../data}/rules/azureopenai.yml | 0 .../data}/rules/azuresearchquery.yml | 0 .../data}/rules/azurestorage.yml | 0 .../data}/rules/baremetrics.yml | 0 .../kingfisher-rules/data}/rules/baseten.yml | 0 .../kingfisher-rules/data}/rules/beamer.yml | 0 .../data}/rules/bitbucket.yml | 0 .../kingfisher-rules/data}/rules/bitly.yml | 0 .../kingfisher-rules/data}/rules/blynk.yml | 0 .../data}/rules/buildkite.yml | 0 .../kingfisher-rules/data}/rules/cerebras.yml | 0 .../kingfisher-rules/data}/rules/circleci.yml | 0 .../data}/rules/ciscomeraki.yml | 0 .../kingfisher-rules/data}/rules/clarifai.yml | 0 .../kingfisher-rules/data}/rules/clay.yml | 0 .../kingfisher-rules/data}/rules/clearbit.yml | 0 .../data}/rules/clickhouse.yml | 0 .../kingfisher-rules/data}/rules/clojars.yml | 0 .../data}/rules/cloudflare.yml | 0 .../data}/rules/cloudsight.yml | 0 .../kingfisher-rules/data}/rules/codacy.yml | 0 .../data}/rules/codeclimate.yml | 0 .../kingfisher-rules/data}/rules/codecov.yml | 0 .../data}/rules/coderabbit.yml | 0 .../kingfisher-rules/data}/rules/cohere.yml | 0 .../kingfisher-rules/data}/rules/coinbase.yml | 0 .../data}/rules/confluent.yml | 0 .../data}/rules/contentful.yml | 0 .../data}/rules/coveralls.yml | 0 .../kingfisher-rules/data}/rules/coze.yml | 0 .../data}/rules/crates.io.yml | 0 .../data}/rules/credentials.yml | 0 .../kingfisher-rules/data}/rules/curl.yml | 0 .../kingfisher-rules/data}/rules/cursor.yml | 0 .../data}/rules/customerio.yml | 0 .../data}/rules/databricks.yml | 0 .../kingfisher-rules/data}/rules/datadog.yml | 0 .../kingfisher-rules/data}/rules/datagov.yml | 0 .../kingfisher-rules/data}/rules/deepgram.yml | 0 .../kingfisher-rules/data}/rules/deepseek.yml | 0 .../data}/rules/definednetworking.yml | 0 .../data}/rules/dependency_track.yml | 0 .../kingfisher-rules/data}/rules/diffbot.yml | 0 .../data}/rules/digitalocean.yml | 0 .../kingfisher-rules/data}/rules/discord.yml | 0 .../kingfisher-rules/data}/rules/disqus.yml | 0 .../kingfisher-rules/data}/rules/django.yml | 0 .../kingfisher-rules/data}/rules/docker.yml | 0 .../data}/rules/dockerhub.yml | 0 .../kingfisher-rules/data}/rules/doppler.yml | 0 .../kingfisher-rules/data}/rules/droneci.yml | 0 .../kingfisher-rules/data}/rules/dropbox.yml | 0 .../kingfisher-rules/data}/rules/duffel.yml | 0 .../data}/rules/dynatrace.yml | 0 .../kingfisher-rules/data}/rules/easypost.yml | 0 .../data}/rules/elevenlabs.yml | 0 .../data}/rules/endorlabs.yml | 0 .../kingfisher-rules/data}/rules/eraserio.yml | 0 .../data}/rules/eventbrite.yml | 0 .../kingfisher-rules/data}/rules/exaai.yml | 0 .../kingfisher-rules/data}/rules/facebook.yml | 0 .../kingfisher-rules/data}/rules/fastly.yml | 0 .../kingfisher-rules/data}/rules/figma.yml | 0 .../kingfisher-rules/data}/rules/fileio.yml | 0 .../data}/rules/filezilla.yml | 0 .../kingfisher-rules/data}/rules/finicity.yml | 0 .../kingfisher-rules/data}/rules/finnhub.yml | 0 .../data}/rules/firecrawl.yml | 0 .../data}/rules/fireworksai.yml | 0 .../data}/rules/fleetbase.yml | 0 .../kingfisher-rules/data}/rules/flickr.yml | 0 .../kingfisher-rules/data}/rules/flyio.yml | 0 .../data}/rules/foursquare.yml | 0 .../kingfisher-rules/data}/rules/frame.io.yml | 0 .../kingfisher-rules/data}/rules/frameio.yml | 0 .../data}/rules/freshbooks.yml | 0 .../data}/rules/freshdesk.yml | 0 .../kingfisher-rules/data}/rules/friendli.yml | 0 .../kingfisher-rules/data}/rules/gcp.yml | 0 .../kingfisher-rules/data}/rules/generic.yml | 0 .../kingfisher-rules/data}/rules/gitalk.yml | 0 .../kingfisher-rules/data}/rules/github.yml | 0 .../kingfisher-rules/data}/rules/gitlab.yml | 0 .../kingfisher-rules/data}/rules/gitter.yml | 0 .../data}/rules/gocardless.yml | 0 .../kingfisher-rules/data}/rules/google.yml | 0 .../data}/rules/googleoauth2.yml | 0 .../kingfisher-rules/data}/rules/gradle.yml | 0 .../kingfisher-rules/data}/rules/grafana.yml | 0 .../kingfisher-rules/data}/rules/groq.yml | 0 .../kingfisher-rules/data}/rules/guardian.yml | 0 .../kingfisher-rules/data}/rules/gumroad.yml | 0 .../kingfisher-rules/data}/rules/harness.yml | 0 .../kingfisher-rules/data}/rules/hashes.yml | 0 .../data}/rules/hashicorp.yml | 0 .../kingfisher-rules/data}/rules/hereapi.yml | 0 .../kingfisher-rules/data}/rules/heroku.yml | 0 .../data}/rules/honeycomb.yml | 0 .../kingfisher-rules/data}/rules/http.yml | 0 .../kingfisher-rules/data}/rules/hubspot.yml | 0 .../data}/rules/huggingface.yml | 0 .../kingfisher-rules/data}/rules/ibm.yml | 0 .../kingfisher-rules/data}/rules/imagekit.yml | 0 .../data}/rules/infracost.yml | 0 .../kingfisher-rules/data}/rules/infura.yml | 0 .../data}/rules/instantly.yml | 0 .../kingfisher-rules/data}/rules/intercom.yml | 0 .../kingfisher-rules/data}/rules/intra42.yml | 0 .../kingfisher-rules/data}/rules/ionic.yml | 0 .../kingfisher-rules/data}/rules/ipstack.yml | 0 .../kingfisher-rules/data}/rules/jdbc.yml | 0 .../kingfisher-rules/data}/rules/jenkins.yml | 0 .../kingfisher-rules/data}/rules/jina.yml | 0 .../kingfisher-rules/data}/rules/jira.yml | 0 .../kingfisher-rules/data}/rules/jotform.yml | 0 .../data}/rules/jumpcloud.yml | 0 .../kingfisher-rules/data}/rules/jwt.yml | 0 .../kingfisher-rules/data}/rules/kagi.yml | 0 .../kingfisher-rules/data}/rules/kickbox.yml | 0 .../kingfisher-rules/data}/rules/klaviyo.yml | 0 .../kingfisher-rules/data}/rules/klingai.yml | 0 .../data}/rules/langchain.yml | 0 .../kingfisher-rules/data}/rules/lark.yml | 0 .../data}/rules/launchdarkly.yml | 0 .../kingfisher-rules/data}/rules/line.yml | 0 .../kingfisher-rules/data}/rules/linear.yml | 0 .../kingfisher-rules/data}/rules/linkedin.yml | 0 .../kingfisher-rules/data}/rules/lob.yml | 0 .../kingfisher-rules/data}/rules/looker.yml | 0 .../data}/rules/mailchimp.yml | 0 .../kingfisher-rules/data}/rules/mailgun.yml | 0 .../kingfisher-rules/data}/rules/mailjet.yml | 0 .../kingfisher-rules/data}/rules/mandrill.yml | 0 .../kingfisher-rules/data}/rules/mapbox.yml | 0 .../data}/rules/mattermost.yml | 0 .../kingfisher-rules/data}/rules/maxmind.yml | 0 .../kingfisher-rules/data}/rules/mergify.yml | 0 .../data}/rules/messagebird.yml | 0 .../data}/rules/microsoft_teams.yml | 0 .../data}/rules/microsoftteamswebhook.yml | 0 .../kingfisher-rules/data}/rules/mistral.yml | 0 .../kingfisher-rules/data}/rules/monday.yml | 0 .../kingfisher-rules/data}/rules/mongodb.yml | 0 .../kingfisher-rules/data}/rules/mysql.yml | 0 .../kingfisher-rules/data}/rules/nasa.yml | 0 .../kingfisher-rules/data}/rules/netlify.yml | 0 .../kingfisher-rules/data}/rules/netrc.yml | 0 .../kingfisher-rules/data}/rules/newrelic.yml | 0 .../kingfisher-rules/data}/rules/ngrok.yml | 0 .../kingfisher-rules/data}/rules/notion.yml | 0 .../kingfisher-rules/data}/rules/npm.yml | 0 .../kingfisher-rules/data}/rules/nuget.yml | 0 .../kingfisher-rules/data}/rules/nvidia.yml | 0 .../kingfisher-rules/data}/rules/nylas.yml | 0 .../kingfisher-rules/data}/rules/nytimes.yml | 0 .../kingfisher-rules/data}/rules/odbc.yml | 0 .../kingfisher-rules/data}/rules/okta.yml | 0 .../kingfisher-rules/data}/rules/ollama.yml | 0 .../data}/rules/onepassword.yml | 0 .../kingfisher-rules/data}/rules/openai.yml | 0 .../data}/rules/openrouter.yml | 0 .../data}/rules/openweathermap.yml | 0 .../kingfisher-rules/data}/rules/opsgenie.yml | 0 .../data}/rules/optimizely.yml | 0 .../kingfisher-rules/data}/rules/owlbot.yml | 0 .../data}/rules/packagecloud.yml | 0 .../data}/rules/pagerdutyapikey.yml | 0 .../data}/rules/particle.io.yml | 0 .../kingfisher-rules/data}/rules/pastebin.yml | 0 .../kingfisher-rules/data}/rules/paypal.yml | 0 .../kingfisher-rules/data}/rules/paystack.yml | 0 .../kingfisher-rules/data}/rules/pdflayer.yml | 0 .../kingfisher-rules/data}/rules/pem.yml | 0 .../data}/rules/perplexity.yml | 0 .../data}/rules/phpmailer.yml | 0 .../kingfisher-rules/data}/rules/plaid.yml | 0 .../data}/rules/planetscale.yml | 0 .../kingfisher-rules/data}/rules/postgres.yml | 0 .../kingfisher-rules/data}/rules/posthog.yml | 0 .../kingfisher-rules/data}/rules/postman.yml | 0 .../kingfisher-rules/data}/rules/postmark.yml | 0 .../kingfisher-rules/data}/rules/prefect.yml | 0 .../kingfisher-rules/data}/rules/privkey.yml | 0 .../kingfisher-rules/data}/rules/psexec.yml | 0 .../kingfisher-rules/data}/rules/pubnub.yml | 0 .../kingfisher-rules/data}/rules/pulumi.yml | 0 .../kingfisher-rules/data}/rules/pypi.yml | 0 .../kingfisher-rules/data}/rules/rabbitmq.yml | 0 .../kingfisher-rules/data}/rules/rapidapi.yml | 0 .../kingfisher-rules/data}/rules/react.yml | 0 .../kingfisher-rules/data}/rules/readme.yml | 0 .../data}/rules/recaptcha.yml | 0 .../data}/rules/replicate.yml | 0 .../kingfisher-rules/data}/rules/resend.yml | 0 .../kingfisher-rules/data}/rules/retellai.yml | 0 .../kingfisher-rules/data}/rules/riot.yml | 0 .../kingfisher-rules/data}/rules/rubygems.yml | 0 .../kingfisher-rules/data}/rules/runway.yml | 0 .../data}/rules/salesforce.yml | 0 .../kingfisher-rules/data}/rules/sauce.yml | 0 .../kingfisher-rules/data}/rules/scale.yml | 0 .../kingfisher-rules/data}/rules/scalingo.yml | 0 .../data}/rules/scraperapi.yml | 0 .../kingfisher-rules/data}/rules/segment.yml | 0 .../kingfisher-rules/data}/rules/sendbird.yml | 0 .../kingfisher-rules/data}/rules/sendgrid.yml | 0 .../data}/rules/sendinblue.yml | 0 .../kingfisher-rules/data}/rules/sentry.yml | 0 .../kingfisher-rules/data}/rules/shippo.yml | 0 .../kingfisher-rules/data}/rules/shodan.yml | 0 .../kingfisher-rules/data}/rules/shopify.yml | 0 .../kingfisher-rules/data}/rules/slack.yml | 0 .../kingfisher-rules/data}/rules/snyk.yml | 0 .../data}/rules/sonarcloud.yml | 0 .../data}/rules/sonarqube.yml | 0 .../data}/rules/sourcegraph.yml | 0 .../kingfisher-rules/data}/rules/square.yml | 0 .../kingfisher-rules/data}/rules/sslmate.yml | 0 .../data}/rules/stabilityai.yml | 0 .../data}/rules/stackhawk.yml | 0 .../data}/rules/statuspage.yml | 0 .../kingfisher-rules/data}/rules/stripe.yml | 0 .../kingfisher-rules/data}/rules/supabase.yml | 0 .../data}/rules/tailscale.yml | 0 .../kingfisher-rules/data}/rules/tavily.yml | 0 .../kingfisher-rules/data}/rules/teamcity.yml | 0 .../kingfisher-rules/data}/rules/telegram.yml | 0 .../data}/rules/thingsboard.yml | 0 .../data}/rules/togetherai.yml | 0 .../kingfisher-rules/data}/rules/travisci.yml | 0 .../kingfisher-rules/data}/rules/truenas.yml | 0 .../kingfisher-rules/data}/rules/twilio.yml | 0 .../kingfisher-rules/data}/rules/twitch.yml | 0 .../kingfisher-rules/data}/rules/twitter.yml | 0 .../kingfisher-rules/data}/rules/typeform.yml | 0 .../kingfisher-rules/data}/rules/uri.yml | 0 .../kingfisher-rules/data}/rules/vastai.yml | 0 .../kingfisher-rules/data}/rules/vercel.yml | 0 .../kingfisher-rules/data}/rules/vmware.yml | 0 .../kingfisher-rules/data}/rules/voyageai.yml | 0 .../data}/rules/weightsandbiases.yml | 0 .../data}/rules/wireguard.yml | 0 .../kingfisher-rules/data}/rules/xAI.yml | 0 .../kingfisher-rules/data}/rules/yandex.yml | 0 .../kingfisher-rules/data}/rules/yelp.yml | 0 .../kingfisher-rules/data}/rules/youtube.yml | 0 .../kingfisher-rules/data}/rules/zhipu.yml | 0 .../kingfisher-rules/data}/rules/zohocrm.yml | 0 .../kingfisher-rules/data}/rules/zuplo.yml | 0 crates/kingfisher-rules/src/defaults.rs | 57 + crates/kingfisher-rules/src/lib.rs | 35 + crates/kingfisher-rules/src/liquid_filters.rs | 1159 +++++++++++++++++ .../kingfisher-rules/src}/rule.rs | 0 crates/kingfisher-rules/src/rules.rs | 245 ++++ crates/kingfisher-rules/src/rules_database.rs | 252 ++++ crates/kingfisher-scanner/Cargo.toml | 120 ++ crates/kingfisher-scanner/src/finding.rs | 278 ++++ crates/kingfisher-scanner/src/lib.rs | 67 + crates/kingfisher-scanner/src/scanner.rs | 595 +++++++++ crates/kingfisher-scanner/src/scanner_pool.rs | 50 + .../kingfisher-scanner/src/validation/aws.rs | 358 +++++ .../src/validation/http_validation.rs | 455 +++++++ .../kingfisher-scanner/src/validation/mod.rs | 131 ++ .../src/validation/utils.rs | 159 +++ .../src/validation/validation_body.rs | 80 ++ docs/LIBRARY.md | 642 +++++++++ src/blob.rs | 422 +----- src/bstring_escape.rs | 140 +- src/cli/commands/mod.rs | 1 + src/cli/commands/validate.rs | 55 + src/cli/global.rs | 6 +- src/defaults.rs | 38 +- src/direct_validate.rs | 850 ++++++++++++ src/entropy.rs | 54 +- src/git_commit_metadata.rs | 142 +- src/lib.rs | 6 + src/liquid_filters.rs | 1159 +---------------- src/location.rs | 257 +--- src/main.rs | 18 +- src/matcher.rs | 30 +- src/origin.rs | 308 +---- src/rule_loader.rs | 6 + src/rules.rs | 255 +--- src/rules/lib.rs | 33 - src/rules/util.rs | 15 - src/rules_database.rs | 243 +--- src/scanner/enumerate.rs | 2 +- src/scanner/repos.rs | 4 +- src/validation.rs | 18 +- 324 files changed, 7483 insertions(+), 3041 deletions(-) create mode 100644 crates/kingfisher-core/Cargo.toml create mode 100644 crates/kingfisher-core/src/blob.rs create mode 100644 crates/kingfisher-core/src/bstring_escape.rs create mode 100644 crates/kingfisher-core/src/entropy.rs create mode 100644 crates/kingfisher-core/src/error.rs create mode 100644 crates/kingfisher-core/src/git_commit_metadata.rs create mode 100644 crates/kingfisher-core/src/lib.rs create mode 100644 crates/kingfisher-core/src/location.rs create mode 100644 crates/kingfisher-core/src/origin.rs create mode 100644 crates/kingfisher-rules/Cargo.toml rename {data => crates/kingfisher-rules/data}/rules/adafruitio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/adobe.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/age.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ai21.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/airbrake.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/airtable.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/aiven.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/alchemy.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/algolia.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/alibaba.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/anthropic.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/anypoint.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/apify.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/apollo.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/artifactory.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/asana.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/assemblyai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/atlassian.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/auth0.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/authress.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/aws.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/azure.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/azuredevops.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/azureopenai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/azuresearchquery.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/azurestorage.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/baremetrics.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/baseten.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/beamer.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/bitbucket.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/bitly.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/blynk.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/buildkite.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/cerebras.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/circleci.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ciscomeraki.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/clarifai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/clay.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/clearbit.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/clickhouse.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/clojars.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/cloudflare.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/cloudsight.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/codacy.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/codeclimate.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/codecov.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/coderabbit.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/cohere.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/coinbase.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/confluent.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/contentful.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/coveralls.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/coze.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/crates.io.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/credentials.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/curl.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/cursor.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/customerio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/databricks.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/datadog.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/datagov.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/deepgram.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/deepseek.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/definednetworking.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/dependency_track.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/diffbot.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/digitalocean.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/discord.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/disqus.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/django.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/docker.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/dockerhub.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/doppler.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/droneci.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/dropbox.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/duffel.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/dynatrace.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/easypost.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/elevenlabs.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/endorlabs.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/eraserio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/eventbrite.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/exaai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/facebook.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/fastly.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/figma.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/fileio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/filezilla.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/finicity.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/finnhub.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/firecrawl.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/fireworksai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/fleetbase.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/flickr.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/flyio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/foursquare.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/frame.io.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/frameio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/freshbooks.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/freshdesk.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/friendli.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gcp.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/generic.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gitalk.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/github.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gitlab.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gitter.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gocardless.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/google.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/googleoauth2.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gradle.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/grafana.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/groq.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/guardian.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/gumroad.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/harness.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/hashes.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/hashicorp.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/hereapi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/heroku.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/honeycomb.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/http.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/hubspot.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/huggingface.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ibm.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/imagekit.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/infracost.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/infura.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/instantly.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/intercom.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/intra42.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ionic.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ipstack.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jdbc.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jenkins.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jina.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jira.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jotform.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jumpcloud.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/jwt.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/kagi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/kickbox.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/klaviyo.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/klingai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/langchain.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/lark.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/launchdarkly.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/line.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/linear.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/linkedin.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/lob.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/looker.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mailchimp.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mailgun.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mailjet.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mandrill.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mapbox.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mattermost.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/maxmind.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mergify.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/messagebird.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/microsoft_teams.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/microsoftteamswebhook.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mistral.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/monday.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mongodb.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/mysql.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/nasa.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/netlify.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/netrc.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/newrelic.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ngrok.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/notion.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/npm.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/nuget.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/nvidia.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/nylas.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/nytimes.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/odbc.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/okta.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/ollama.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/onepassword.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/openai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/openrouter.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/openweathermap.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/opsgenie.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/optimizely.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/owlbot.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/packagecloud.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pagerdutyapikey.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/particle.io.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pastebin.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/paypal.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/paystack.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pdflayer.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pem.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/perplexity.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/phpmailer.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/plaid.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/planetscale.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/postgres.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/posthog.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/postman.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/postmark.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/prefect.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/privkey.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/psexec.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pubnub.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pulumi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/pypi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/rabbitmq.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/rapidapi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/react.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/readme.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/recaptcha.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/replicate.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/resend.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/retellai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/riot.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/rubygems.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/runway.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/salesforce.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sauce.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/scale.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/scalingo.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/scraperapi.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/segment.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sendbird.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sendgrid.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sendinblue.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sentry.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/shippo.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/shodan.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/shopify.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/slack.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/snyk.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sonarcloud.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sonarqube.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sourcegraph.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/square.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/sslmate.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/stabilityai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/stackhawk.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/statuspage.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/stripe.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/supabase.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/tailscale.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/tavily.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/teamcity.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/telegram.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/thingsboard.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/togetherai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/travisci.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/truenas.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/twilio.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/twitch.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/twitter.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/typeform.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/uri.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/vastai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/vercel.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/vmware.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/voyageai.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/weightsandbiases.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/wireguard.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/xAI.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/yandex.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/yelp.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/youtube.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/zhipu.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/zohocrm.yml (100%) rename {data => crates/kingfisher-rules/data}/rules/zuplo.yml (100%) create mode 100644 crates/kingfisher-rules/src/defaults.rs create mode 100644 crates/kingfisher-rules/src/lib.rs create mode 100644 crates/kingfisher-rules/src/liquid_filters.rs rename {src/rules => crates/kingfisher-rules/src}/rule.rs (100%) create mode 100644 crates/kingfisher-rules/src/rules.rs create mode 100644 crates/kingfisher-rules/src/rules_database.rs create mode 100644 crates/kingfisher-scanner/Cargo.toml create mode 100644 crates/kingfisher-scanner/src/finding.rs create mode 100644 crates/kingfisher-scanner/src/lib.rs create mode 100644 crates/kingfisher-scanner/src/scanner.rs create mode 100644 crates/kingfisher-scanner/src/scanner_pool.rs create mode 100644 crates/kingfisher-scanner/src/validation/aws.rs create mode 100644 crates/kingfisher-scanner/src/validation/http_validation.rs create mode 100644 crates/kingfisher-scanner/src/validation/mod.rs create mode 100644 crates/kingfisher-scanner/src/validation/utils.rs create mode 100644 crates/kingfisher-scanner/src/validation/validation_body.rs create mode 100644 docs/LIBRARY.md create mode 100644 src/cli/commands/validate.rs create mode 100644 src/direct_validate.rs delete mode 100644 src/rules/lib.rs delete mode 100644 src/rules/util.rs diff --git a/Cargo.toml b/Cargo.toml index 896bfe5..f80052d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,12 @@ +[workspace] +members = [ + ".", + "crates/kingfisher-core", + "crates/kingfisher-rules", + "crates/kingfisher-scanner", +] +resolver = "2" + [workspace.package] edition = "2021" rust-version = "1.90" @@ -40,6 +49,11 @@ assets = [ ] [dependencies] +# Library crates +kingfisher-core = { path = "crates/kingfisher-core" } +kingfisher-rules = { path = "crates/kingfisher-rules" } +kingfisher-scanner = { path = "crates/kingfisher-scanner" } + clap = { version = "4.5", features = [ "cargo", "derive", diff --git a/README.md b/README.md index f92e38a..157c525 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Designed for offensive security engineers and blue-teamers alike, Kingfisher hel - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) - **Checksum-aware detection**: verifies tokens with built-in checksums (e.g., GitHub, Confluent, Zuplo) — no API calls required - **Built-in Report Viewer**: Visualize and triage findings locally with `kingisher view ./report-file.json` +- **Library crates**: Embed Kingfisher's scanning engine in your own Rust applications ([docs/LIBRARY.md](docs/LIBRARY.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -58,7 +59,7 @@ NOTE: Replay has been slowed down for demo ![alt text](docs/kingfisher-usage-01.gif) ## Report Viewer Demo -Explore Kingfisher’s built-in report viewer and its `--access-map`, which can show what the token (AWS, GCP, Azure, GitHub, and GitLab...more coming) can actually access : [Access map outputs and viewer](#access-map-outputs-and-viewer) +Explore Kingfisher’s built-in report viewer and its `--access-map`, which can show what the token (AWS, GCP, Azure, GitHub, GitLab, and Slack...more coming) can actually access : [Access map outputs and viewer](#access-map-outputs-and-viewer) Note: when you pass `--view-report`, Kingfisher starts a **localhost-only** web server on port `7890` and opens it in your default browser. You’ll see this near the end of the scan output, and **Kingfisher will keep running** until you stop it. @@ -117,6 +118,7 @@ kingfisher scan /path/to/scan --access-map --view-report - [Access map outputs and viewer](#access-map-outputs-and-viewer) - [View access-map reports locally](#view-access-map-reports-locally) - [Pipe any text directly into Kingfisher by passing `-`](#pipe-any-text-directly-into-kingfisher-by-passing--) + - [Direct secret validation with `kingfisher validate`](#direct-secret-validation-with-kingfisher-validate) - [Limit maximum file size scanned (`--max-file-size`)](#limit-maximum-file-size-scanned---max-file-size) - [Scan using a rule _family_ with one flag](#scan-using-a-rule-family-with-one-flag) - [Display rule performance statistics](#display-rule-performance-statistics) @@ -189,6 +191,7 @@ kingfisher scan /path/to/scan --access-map --view-report - [Rule Performance Profiling](#rule-performance-profiling) - [CLI Options](#cli-options) - [Lineage and Evolution](#lineage-and-evolution) +- [Library Usage](#library-usage) - [Roadmap](#roadmap) - [License](#license) @@ -593,7 +596,7 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif Finding a leaked credential is only the first step. The critical question isn’t just “Is this a secret?”—it’s “What can an attacker do with it?” -Kingfisher's `--access-map` feature transforms secret detection from a simple alert into a comprehensive threat assessment. Instead of leaving you with a cryptic API key, Kingfisher actively authenticates against your cloud provider (AWS, GCP, Azure Storage, Azure DevOps, GitHub, or GitLab) to map the full extent of the credential's power. +Kingfisher's `--access-map` feature transforms secret detection from a simple alert into a comprehensive threat assessment. Instead of leaving you with a cryptic API key, Kingfisher actively authenticates against your cloud provider (AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, or Slack) to map the full extent of the credential's power. * Instant Identity Resolution: Immediately identify who the key belongs to—whether it's a specific IAM user, an assumed role, or a service account. * Visualize the Blast Radius: See exactly which resources (S3 buckets, EC2 instances, projects, storage containers) are exposed and at risk. @@ -624,6 +627,77 @@ cat /path/to/file.py | kingfisher scan - ``` +### Direct secret validation with `kingfisher validate` + +When you already know a secret's type and have the raw value, use `kingfisher validate` to check if it's still active—without needing the surrounding context that detection rules require. + +This is useful for: +- Re-validating a known secret from a previous scan +- Checking if a credential is still active before rotation +- Validating secrets from external sources (password managers, ticketing systems, etc.) + +```bash +# Validate an OpsGenie API key (using rule prefix matching) +kingfisher validate --rule kingfisher.opsgenie "12345678-9abc-def0-1234-56789abcdef0" + +# Validate from stdin +echo "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | kingfisher validate --rule kingfisher.github - + +# JSON output for scripting +kingfisher validate --rule kingfisher.slack "xoxb-..." --format json + +# AWS credentials - use --arg to auto-assign additional values +kingfisher validate --rule kingfisher.aws --arg AKIAIOSFODNN7EXAMPLE \ + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + +# Or use --var if you know the variable name +kingfisher validate --rule kingfisher.aws.2 --var AKID=AKIAIOSFODNN7EXAMPLE \ + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + +# GCP service account (pass JSON as secret) +kingfisher validate --rule kingfisher.gcp "$(cat service-account.json)" + +# MongoDB connection string +kingfisher validate --rule kingfisher.mongodb.3 \ + "mongodb+srv://user:password@cluster.mongodb.net/db" + +# PostgreSQL connection +kingfisher validate --rule kingfisher.postgres \ + "postgres://admin:password@db.example.com:5432/mydb" + +# JWT token +kingfisher validate --rule kingfisher.jwt \ + "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9..." +``` + +**Supported validators:** HTTP, AWS, GCP, MongoDB, MySQL, Postgres, JDBC, JWT, Azure Storage, and Coinbase. + +**Exit codes:** Returns `0` if any matching rule validates the secret as valid, `1` if all are invalid or an error occurred. + +**Passing additional values (`--arg` and `--var`):** + +Some validators need more than just the secret. For example, AWS needs both an access key ID and the secret key (see the rule for `dependent_rule` section): + +- `--arg VALUE` — Auto-assigns values to template variables (in alphabetical order). Use when you don't know the exact variable name. +- `--var NAME=VALUE` — Explicitly sets a variable. Use when you know the exact name, or to override `--arg`. + +```bash +# --arg auto-assigns to AKID (the only non-TOKEN variable for AWS) +kingfisher validate --rule kingfisher.aws --arg AKIAEXAMPLE "secret_key" + +# --var for explicit assignment +kingfisher validate --rule kingfisher.aws --var AKID=AKIAEXAMPLE "secret_key" +``` + +**Rule prefix matching:** Use partial rule IDs like `kingfisher.opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**: + +```bash +$ kingfisher validate --rule kingfisher.aws --arg AKIAEXAMPLE "secret_key" +Rule: AWS Secret Access Key (kingfisher.aws.2) +Result: ✓ VALID +Response: arn:aws:iam::123456789012:user/example +``` + ### Limit maximum file size scanned (`--max-file-size`) By default, Kingfisher skips files larger than **256 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**. @@ -1613,6 +1687,41 @@ Since then it has evolved far beyond that starting point, introducing live valid - **Unified workflow** with JSON/BSON/SARIF outputs - **Cross-platform builds** for Linux, macOS, and Windows +# Library Usage + +Kingfisher's scanning engine is available as a set of Rust library crates that can be embedded into other applications: + +| Crate | Description | +|-------|-------------| +| `kingfisher-core` | Core types: `Blob`, `BlobId`, `Location`, `Origin`, entropy calculation | +| `kingfisher-rules` | Rule definitions, YAML parsing, compiled rule database, 200+ builtin rules | +| `kingfisher-scanner` | High-level scanning API with `Scanner` and `Finding` types | + +**Quick example:** + +```rust +use std::sync::Arc; +use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule}; +use kingfisher_scanner::Scanner; + +// Load builtin rules and compile +let rules = get_builtin_rules(None)?; +let rule_vec: Vec = rules.iter_rules() + .map(|syntax| Rule::new(syntax.clone())) + .collect(); +let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?); + +// Create scanner and scan +let scanner = Scanner::new(rules_db); +let findings = scanner.scan_file("config.yml")?; + +for finding in findings { + println!("{}: {}", finding.rule_name, finding.secret); +} +``` + +For complete documentation, see **[docs/LIBRARY.md](docs/LIBRARY.md)**. + # Roadmap - More rules diff --git a/crates/kingfisher-core/Cargo.toml b/crates/kingfisher-core/Cargo.toml new file mode 100644 index 0000000..dfd9513 --- /dev/null +++ b/crates/kingfisher-core/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "kingfisher-core" +version = "0.1.0" +description = "Core types and traits for Kingfisher secret scanner" +edition.workspace = true +rust-version.workspace = true +license.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +publish.workspace = true + +[dependencies] +# Serialization +serde = { version = "1.0", features = ["derive", "rc"] } +serde_json = "1.0" +schemars = "0.8" + +# Error handling +anyhow = "1.0" +thiserror = "1.0" + +# Hashing and crypto +sha1 = "0.10" +hex = "0.4" + +# Memory management +memmap2 = "0.9" +once_cell = "1.21" +parking_lot = "0.12" + +# Collections +smallvec = { version = "1", features = ["const_generics", "const_new", "union"] } +rustc-hash = "2.1" +dashmap = "6.1" + +# Byte string handling +bstr = { version = "1.12", features = ["serde"] } +memchr = "2.7" + +# Git types (minimal, for ObjectId and Time) +gix = { version = "0.73", default-features = false, features = ["serde"] } + +# Console formatting +console = "0.15" + +[dev-dependencies] +pretty_assertions = "1.4" diff --git a/crates/kingfisher-core/src/blob.rs b/crates/kingfisher-core/src/blob.rs new file mode 100644 index 0000000..a25e60d --- /dev/null +++ b/crates/kingfisher-core/src/blob.rs @@ -0,0 +1,495 @@ +//! Blob representation for scannable content. +//! +//! A [`Blob`] represents content that can be scanned for secrets. It can be +//! created from: +//! - In-memory bytes ([`Blob::from_bytes`]) +//! - A file path ([`Blob::from_file`]) +//! - Borrowed data ([`Blob::from_borrowed`]) +//! +//! Large files are automatically memory-mapped for efficiency. + +use std::{ + convert::TryInto, + fs::File, + io::{Read, Write}, + path::Path, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; + +use bstr::{BString, ByteSlice}; +use gix::ObjectId; +use once_cell::sync::OnceCell; +use parking_lot::Mutex; +use rustc_hash::FxHashMap; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sha1::{Digest, Sha1}; +use smallvec::SmallVec; + +use crate::error::Result; +use crate::git_commit_metadata::CommitMetadata; + +/// Threshold above which files are memory-mapped instead of read into memory. +const LARGE_FILE_THRESHOLD: u64 = 0; // Currently: always mmap + +/// Global counter for temporary blob IDs. +static NEXT_ID: AtomicU64 = AtomicU64::new(1); + +/// Tracks where a blob was seen in git history. +#[derive(Clone, Debug, serde::Serialize)] +pub struct BlobAppearance { + /// Metadata about the commit where this blob appeared. + pub commit_metadata: Arc, + + /// The path of the blob within the repository. + pub path: BString, +} + +impl BlobAppearance { + /// Returns the path as a `&Path`, if it's valid UTF-8. + #[inline] + pub fn path(&self) -> std::result::Result<&Path, bstr::Utf8Error> { + self.path.to_path() + } +} + +/// A set of [`BlobAppearance`] entries, optimized for the common case of a single appearance. +pub type BlobAppearanceSet = SmallVec<[BlobAppearance; 1]>; + +/// The underlying data storage for a [`Blob`]. +pub enum BlobData<'a> { + /// Small blobs stored as owned bytes. + Owned(Vec), + + /// Large blobs that are memory-mapped from disk. + Mapped(memmap2::Mmap), + + /// Borrowed bytes (e.g., from a git pack file). + Borrowed(&'a [u8]), +} + +impl<'a> AsRef<[u8]> for BlobData<'a> { + fn as_ref(&self) -> &[u8] { + match self { + BlobData::Owned(v) => v, + BlobData::Mapped(m) => m, + BlobData::Borrowed(slice) => slice, + } + } +} + +impl<'a> BlobData<'a> { + /// Returns the length of the blob data in bytes. + #[inline] + pub fn len(&self) -> usize { + self.as_ref().len() + } + + /// Returns true if the blob data is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.as_ref().is_empty() + } +} + +/// A scannable blob of content. +/// +/// `Blob` is the primary type for representing content to be scanned. It lazily +/// computes a content-based ID (SHA-1) and supports multiple backing storage types. +/// +/// # Examples +/// +/// ``` +/// use kingfisher_core::Blob; +/// +/// // Create from bytes +/// let blob = Blob::from_bytes(b"my secret content".to_vec()); +/// assert_eq!(blob.len(), 17); +/// +/// // Create from file +/// // let blob = Blob::from_file("path/to/file.txt")?; +/// ``` +pub struct Blob<'a> { + /// Lazily computed content-based ID. + id: OnceCell, + /// The underlying data. + data: BlobData<'a>, + /// Temporary ID assigned at creation (for debugging/tracking). + temp_id: u64, +} + +impl Blob<'_> { + /// Create a new `Blob` by reading from a file. + /// + /// Large files are automatically memory-mapped for efficiency. + #[inline] + pub fn from_file>(path: P) -> Result { + let mut file = File::open(&path)?; + let file_size = file.metadata()?.len(); + let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + + if file_size > LARGE_FILE_THRESHOLD { + // Large files: one mmap, zero extra copies. + let mmap = unsafe { memmap2::Mmap::map(&file)? }; + Ok(Blob { id: OnceCell::new(), data: BlobData::Mapped(mmap), temp_id }) + } else { + // Small files: read into memory. + let mut bytes = Vec::with_capacity(file_size as usize); + file.read_to_end(&mut bytes)?; + Ok(Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id }) + } + } + + /// Create a new `Blob` from a vector of bytes. + #[inline] + pub fn from_bytes(bytes: Vec) -> Self { + let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id } + } + + /// Create a new `Blob` with a pre-computed ID and owned data. + #[inline] + pub fn new(id: BlobId, bytes: Vec) -> Self { + let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + let cell = OnceCell::new(); + let _ = cell.set(id); + Blob { id: cell, data: BlobData::Owned(bytes), temp_id } + } + + /// Returns the blob's content as a byte slice. + #[inline] + pub fn bytes(&self) -> &[u8] { + self.data.as_ref() + } + + /// Lazily computes and returns the blob's content-based [`BlobId`]. + #[inline] + pub fn id(&self) -> BlobId { + *self.id.get_or_init(|| BlobId::new(self.bytes())) + } + + /// Returns a reference to the blob's [`BlobId`], computing it if necessary. + #[inline] + pub fn id_ref(&self) -> &BlobId { + self.id.get_or_init(|| BlobId::new(self.bytes())) + } + + /// Returns the temporary ID assigned when this blob was created. + #[inline] + pub fn temp_id(&self) -> u64 { + self.temp_id + } + + /// Returns the length of the blob in bytes. + #[inline] + pub fn len(&self) -> usize { + self.bytes().len() + } + + /// Returns true if the blob is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.bytes().is_empty() + } +} + +impl<'a> Blob<'a> { + /// Create a new `Blob` from borrowed bytes. + /// + /// This is useful for zero-copy scanning of data that already exists + /// in memory (e.g., from a git pack file). + #[inline] + pub fn from_borrowed(bytes: &'a [u8]) -> Self { + let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + Blob { id: OnceCell::new(), data: BlobData::Borrowed(bytes), temp_id } + } +} + +impl Drop for Blob<'_> { + fn drop(&mut self) { + // For owned data, clear and shrink to free memory promptly. + if let BlobData::Owned(ref mut v) = self.data { + v.clear(); + v.shrink_to_fit(); + } + } +} + +/// A content-based identifier for a blob, computed as a Git-compatible SHA-1 hash. +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone, Serialize)] +#[serde(into = "String")] +pub struct BlobId([u8; 20]); + +impl BlobId { + /// Creates a zero-filled (default) `BlobId`. + pub fn default() -> Self { + BlobId([0; 20]) + } + + /// Computes a `BlobId` from raw bytes. + /// + /// For large inputs, only the first and last 64KB are hashed for performance. + #[inline] + pub fn new(input: &[u8]) -> Self { + const CHUNK: usize = 64 * 1024; // 64KB from start and end + let mut hasher = Sha1::new(); + write!(&mut hasher, "blob {}\0", input.len()).unwrap(); + if input.len() <= CHUNK * 2 { + hasher.update(input); + } else { + hasher.update(&input[..CHUNK]); + hasher.update(&input[input.len() - CHUNK..]); + } + let digest: [u8; 20] = hasher.finalize().into(); + BlobId(digest) + } + + /// Computes a `BlobId` from the complete bytes (no truncation). + pub fn compute_from_bytes(bytes: &[u8]) -> Self { + let mut hasher = Sha1::new(); + write!(&mut hasher, "blob {}\0", bytes.len()).unwrap(); + hasher.update(bytes); + let digest: [u8; 20] = hasher.finalize().into(); + BlobId(digest) + } + + /// Parses a `BlobId` from a hex string. + #[inline] + pub fn from_hex(v: &str) -> crate::Result { + let bytes = hex::decode(v)?; + let arr: [u8; 20] = + bytes.as_slice().try_into().map_err(|_| crate::Error::InvalidBlobId(v.to_string()))?; + Ok(BlobId(arr)) + } + + /// Returns the blob ID as a hex string. + #[inline] + pub fn hex(&self) -> String { + hex::encode(self.0) + } + + /// Returns the raw bytes of the blob ID. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl<'de> Deserialize<'de> for BlobId { + fn deserialize>(d: D) -> std::result::Result { + struct Vis; + impl serde::de::Visitor<'_> for Vis { + type Value = BlobId; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a 40-character hex string") + } + + fn visit_str( + self, + v: &str, + ) -> std::result::Result { + BlobId::from_hex(v).map_err(|e| serde::de::Error::custom(e)) + } + } + d.deserialize_str(Vis) + } +} + +impl std::fmt::Debug for BlobId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "BlobId({})", self.hex()) + } +} + +impl std::fmt::Display for BlobId { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.hex()) + } +} + +impl JsonSchema for BlobId { + fn schema_name() -> String { + "BlobId".into() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + let s = String::json_schema(gen); + let mut o = s.into_object(); + o.string().pattern = Some("[0-9a-f]{40}".into()); + let md = o.metadata(); + md.description = Some("A hex-encoded blob ID as computed by Git".into()); + schemars::schema::Schema::Object(o) + } +} + +impl From for String { + #[inline] + fn from(blob_id: BlobId) -> String { + blob_id.hex() + } +} + +impl TryFrom<&str> for BlobId { + type Error = crate::Error; + + #[inline] + fn try_from(s: &str) -> std::result::Result { + BlobId::from_hex(s) + } +} + +impl<'a> From<&'a gix::ObjectId> for BlobId { + #[inline] + fn from(id: &'a gix::ObjectId) -> Self { + BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value")) + } +} + +impl From for BlobId { + #[inline] + fn from(id: gix::ObjectId) -> Self { + BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value")) + } +} + +impl<'a> From<&'a BlobId> for gix::ObjectId { + #[inline] + fn from(blob_id: &'a BlobId) -> Self { + gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap() + } +} + +impl From for gix::ObjectId { + #[inline] + fn from(blob_id: BlobId) -> Self { + gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap() + } +} + +/// A concurrent map with [`BlobId`] keys, optimized for low contention. +/// +/// This implementation uses 256 shards (based on the first byte of the blob ID) +/// to minimize lock contention during parallel scanning. +pub struct BlobIdMap { + maps: [Mutex>; 256], +} + +impl BlobIdMap { + /// Creates a new empty `BlobIdMap`. + pub fn new() -> Self { + BlobIdMap { maps: std::array::from_fn(|_| Mutex::new(FxHashMap::default())) } + } + + /// Inserts a value, returning the previous value if one existed. + #[inline] + pub fn insert(&self, blob_id: BlobId, v: V) -> Option { + let idx = blob_id.as_bytes()[0] as usize; + self.maps[idx].lock().insert(blob_id.into(), v) + } + + /// Returns true if the map contains the given key. + #[inline] + pub fn contains_key(&self, blob_id: &BlobId) -> bool { + let idx = blob_id.as_bytes()[0] as usize; + self.maps[idx].lock().contains_key(&ObjectId::from(blob_id)) + } + + /// Returns the total number of entries in the map. + /// + /// Note: This is not a cheap operation as it must lock all shards. + pub fn len(&self) -> usize { + self.maps.iter().map(|m| m.lock().len()).sum() + } + + /// Returns true if the map is empty. + pub fn is_empty(&self) -> bool { + self.maps.iter().all(|m| m.lock().is_empty()) + } +} + +impl BlobIdMap { + /// Gets a copy of the value for the given key. + #[inline] + pub fn get(&self, blob_id: &BlobId) -> Option { + let idx = blob_id.as_bytes()[0] as usize; + self.maps[idx].lock().get(&ObjectId::from(blob_id)).copied() + } +} + +impl Default for BlobIdMap { + fn default() -> Self { + Self::new() + } +} + +/// Metadata about a blob. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, JsonSchema)] +pub struct BlobMetadata { + /// The blob's content-based ID. + pub id: BlobId, + + /// The length of the blob in bytes. + pub num_bytes: usize, + + /// The guessed MIME type of the blob (e.g., "text/plain"). + pub mime_essence: Option, + + /// The guessed programming language of the blob (e.g., "Python"). + pub language: Option, +} + +impl BlobMetadata { + /// Returns the size in bytes. + #[inline] + pub fn num_bytes(&self) -> usize { + self.num_bytes + } + + /// Returns the size in megabytes, rounded to 3 decimal places. + #[inline] + pub fn num_megabytes(&self) -> f64 { + let mb = self.num_bytes as f64 / 1_048_576.0; + format!("{:.3}", mb).parse::().unwrap_or(mb) + } + + /// Returns the MIME essence if known. + #[inline] + pub fn mime_essence(&self) -> Option<&str> { + self.mime_essence.as_deref() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_blob_id_empty() { + assert_eq!(BlobId::new(&[]).hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"); + } + + #[test] + fn test_blob_id_small() { + assert_eq!(BlobId::new(&vec![0; 1024]).hex(), "06d7405020018ddf3cacee90fd4af10487da3d20"); + } + + #[test] + fn test_blob_from_bytes() { + let blob = Blob::from_bytes(b"hello world".to_vec()); + assert_eq!(blob.len(), 11); + assert_eq!(blob.bytes(), b"hello world"); + } + + #[test] + fn test_blob_id_roundtrip() { + let original = BlobId::new(b"test data"); + let hex = original.hex(); + let parsed = BlobId::from_hex(&hex).unwrap(); + assert_eq!(original, parsed); + } +} diff --git a/crates/kingfisher-core/src/bstring_escape.rs b/crates/kingfisher-core/src/bstring_escape.rs new file mode 100644 index 0000000..26e4c82 --- /dev/null +++ b/crates/kingfisher-core/src/bstring_escape.rs @@ -0,0 +1,128 @@ +//! Safe string escaping utilities. +//! +//! This module provides utilities for safely displaying byte strings that may +//! contain non-UTF8 data, ANSI escape codes, or control characters. + +use std::{ + borrow::Cow, + fmt::{Display, Formatter}, +}; + +use console::strip_ansi_codes; + +/// Escapes non-printing characters in a string while preserving whitespace. +/// +/// Returns borrowed data if no escaping was needed, avoiding allocations. +fn escape_nonprinting(s: &str) -> Cow<'_, str> { + // Fast path - return original if no control chars (except whitespace) + if s.chars().all(|ch| !ch.is_control() || ch.is_whitespace()) { + return Cow::Borrowed(s); + } + + // Allocate with extra capacity for possible escape sequences + let mut escaped = String::with_capacity(s.len() * 2); + let mut chars = s.chars().peekable(); + + while let Some(ch) = chars.next() { + match ch { + // Handle ANSI escape sequences + '\x1B' => continue, + // Escape non-whitespace control characters + ch if ch.is_control() && !ch.is_whitespace() => { + use std::fmt::Write; + write!(escaped, "{}", ch.escape_unicode()).expect("string writing must succeed"); + } + // Pass through all other characters unchanged + ch => escaped.push(ch), + } + } + + Cow::Owned(escaped) +} + +/// A wrapper around `&[u8]` that provides safe string formatting. +/// +/// When displayed, `Escaped` will: +/// 1. Convert from UTF-8 with replacement of invalid sequences +/// 2. Remove ANSI control sequences +/// 3. Escape remaining control characters (except whitespace) +/// +/// # Examples +/// +/// ``` +/// use kingfisher_core::Escaped; +/// +/// let bytes = b"Hello\x00World"; +/// let escaped = Escaped(bytes); +/// assert_eq!(escaped.to_string(), "Hello\\u{0}World"); +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct Escaped<'a>(pub &'a [u8]); + +impl Display for Escaped<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // First handle UTF-8 decoding with replacement characters + let decoded = String::from_utf8_lossy(self.0); + // Then strip ANSI sequences and escape control chars + let stripped = strip_ansi_codes(&decoded); + let escaped = escape_nonprinting(&stripped); + f.write_str(&escaped) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_escape_normal_text() { + let input = "Hello, World!"; + let result = escape_nonprinting(input); + assert!(matches!(result, Cow::Borrowed(_))); + assert_eq!(result, "Hello, World!"); + } + + #[test] + fn test_escape_with_whitespace() { + let input = "Hello\n\t World!"; + let result = escape_nonprinting(input); + assert!(matches!(result, Cow::Borrowed(_))); + assert_eq!(result, "Hello\n\t World!"); + } + + #[test] + fn test_escape_control_chars() { + let input = "Hello\x00World\x01"; + let result = escape_nonprinting(input); + assert!(matches!(result, Cow::Owned(_))); + assert_eq!(result, "Hello\\u{0}World\\u{1}"); + } + + #[test] + fn test_escaped_struct_simple() { + let bytes = b"Hello World"; + let escaped = Escaped(bytes); + assert_eq!(escaped.to_string(), "Hello World"); + } + + #[test] + fn test_escaped_struct_ansi_codes() { + let bytes = b"\x1b[31mRed\x1b[0m \x1b[32mGreen\x1b[0m"; + let escaped = Escaped(bytes); + assert_eq!(escaped.to_string(), "Red Green"); + } + + #[test] + fn test_escaped_struct_invalid_utf8() { + let bytes = b"Hello\xFF\xFEWorld"; + let escaped = Escaped(bytes); + assert_eq!(escaped.to_string(), "Hello\u{FFFD}\u{FFFD}World"); + } + + #[test] + fn test_escaped_struct_empty() { + let bytes = b""; + let escaped = Escaped(bytes); + assert_eq!(escaped.to_string(), ""); + } +} diff --git a/crates/kingfisher-core/src/entropy.rs b/crates/kingfisher-core/src/entropy.rs new file mode 100644 index 0000000..a3cebc6 --- /dev/null +++ b/crates/kingfisher-core/src/entropy.rs @@ -0,0 +1,89 @@ +//! Shannon entropy calculation. +//! +//! Entropy is used to filter out low-entropy strings that are unlikely +//! to be real secrets. + +/// Calculates the Shannon entropy of a byte slice. +/// +/// Returns a value between 0.0 (completely uniform) and 8.0 (maximum entropy +/// for random bytes). Typical thresholds for secret detection are around 3.5-4.5. +/// +/// # Examples +/// +/// ``` +/// use kingfisher_core::calculate_shannon_entropy; +/// +/// // Low entropy (repeated character) +/// let entropy = calculate_shannon_entropy(b"aaaaaaaaaa"); +/// assert!(entropy < 0.1); +/// +/// // High entropy (random-looking) +/// let entropy = calculate_shannon_entropy(b"j2k#9K$mL*p&vN3"); +/// assert!(entropy > 3.5); +/// ``` +pub fn calculate_shannon_entropy(bytes: &[u8]) -> f32 { + if bytes.is_empty() { + return 0.0; + } + + // Count occurrences of each byte value (0-255) + let mut counts = [0u32; 256]; + for &byte in bytes { + counts[byte as usize] += 1; + } + + let total_bytes = bytes.len() as f32; + + // Sum entropy contribution for each byte that appears at least once + counts.iter().filter(|&&count| count > 0).fold(0.0, |entropy, &count| { + let probability = count as f32 / total_bytes; + entropy - probability * probability.log2() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_entropy_empty() { + let entropy = calculate_shannon_entropy(&[]); + assert_eq!(entropy, 0.0); + assert!(entropy.is_finite()); + } + + #[test] + fn test_entropy_uniform() { + // Single repeated byte should return 0.0 + let entropy = calculate_shannon_entropy(&[65, 65, 65, 65]); + assert_eq!(entropy, 0.0); + assert!(entropy.is_finite()); + } + + #[test] + fn test_entropy_two_values() { + // Even distribution of two bytes should be exactly 1.0 + let input = &[1, 2, 1, 2]; + let entropy = calculate_shannon_entropy(input); + assert!((entropy - 1.0).abs() < 0.0001); + assert!(entropy.is_finite()); + } + + #[test] + fn test_entropy_password() { + // Real password example should have mid-range entropy + let password = "Password123!".as_bytes(); + let entropy = calculate_shannon_entropy(password); + assert!(entropy > 2.5); + assert!(entropy.is_finite()); + } + + #[test] + fn test_entropy_random() { + // Random-looking string should have high entropy + let random = "j2k#9K$mL*p&vN3".as_bytes(); + let entropy = calculate_shannon_entropy(random); + assert!(entropy > 3.5); + assert!(entropy.is_finite()); + } +} diff --git a/crates/kingfisher-core/src/error.rs b/crates/kingfisher-core/src/error.rs new file mode 100644 index 0000000..b288d3c --- /dev/null +++ b/crates/kingfisher-core/src/error.rs @@ -0,0 +1,36 @@ +//! Error types for kingfisher-core. + +use thiserror::Error; + +/// The primary error type for kingfisher-core operations. +#[derive(Error, Debug)] +pub enum Error { + /// An I/O error occurred. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Failed to parse a blob ID from hex. + #[error("Invalid blob ID: {0}")] + InvalidBlobId(String), + + /// A hex decoding error occurred. + #[error("Hex decode error: {0}")] + HexDecode(#[from] hex::FromHexError), + + /// Failed to open or read a Git repository. + #[error("Git error: {0}")] + Git(String), + + /// A generic error with a message. + #[error("{0}")] + Other(String), +} + +impl From for Error { + fn from(e: gix::open::Error) -> Self { + Error::Git(e.to_string()) + } +} + +/// A specialized Result type for kingfisher-core operations. +pub type Result = std::result::Result; diff --git a/crates/kingfisher-core/src/git_commit_metadata.rs b/crates/kingfisher-core/src/git_commit_metadata.rs new file mode 100644 index 0000000..4268d5d --- /dev/null +++ b/crates/kingfisher-core/src/git_commit_metadata.rs @@ -0,0 +1,159 @@ +//! Git commit metadata types. +//! +//! This module provides types for tracking commit information associated +//! with blobs found in git history. + +use gix::{date::Time, ObjectId}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +// Wrapper for serializing gix::date::Time as text +#[repr(transparent)] +#[derive(Serialize, Deserialize, Copy, Clone)] +#[serde(remote = "Time")] +struct TextTime( + #[serde( + getter = "text_time::getter", + serialize_with = "text_time::serialize", + deserialize_with = "text_time::deserialize" + )] + Time, +); + +impl From for Time { + fn from(v: TextTime) -> Self { + v.0 + } +} + +impl From