2026-04-05 10:38:20 -07:00
#!/usr/bin/env python3
"""
Reads all YAML rule definition files from crates / kingfisher - rules / data / rules /
and generates a searchable markdown page listing all built - in rules .
"""
2026-04-05 11:31:03 -07:00
from html import escape
2026-04-05 10:38:20 -07:00
from pathlib import Path
2026-04-05 11:31:03 -07:00
import yaml
2026-04-05 10:38:20 -07:00
REPO_ROOT = Path ( __file__ ) . resolve ( ) . parent . parent . parent
RULES_DIR = REPO_ROOT / " crates " / " kingfisher-rules " / " data " / " rules "
OUTPUT = REPO_ROOT / " docs-site " / " docs " / " rules " / " builtin-rules.md "
def load_rules ( ) :
2026-04-05 11:02:19 -07:00
""" Load all rules from YAML files.
Uses yaml . safe_load_all to handle multi - document YAML files , and looks
for rules under a ' rules ' key in each document — matching the canonical
counting logic in data / default / rule_cleanup / count_rules . py .
"""
2026-04-05 10:38:20 -07:00
all_rules = [ ]
for yml_file in sorted ( RULES_DIR . glob ( " *.yml " ) ) :
provider = yml_file . stem . replace ( " _ " , " " ) . replace ( " - " , " " ) . title ( )
try :
with open ( yml_file , " r " , encoding = " utf-8 " ) as f :
2026-04-05 11:02:19 -07:00
documents = list ( yaml . safe_load_all ( f ) )
2026-04-05 10:38:20 -07:00
except Exception as e :
print ( f " WARNING: Failed to parse { yml_file . name } : { e } " )
continue
2026-04-05 11:02:19 -07:00
for document in documents :
if not isinstance ( document , dict ) :
2026-04-05 10:38:20 -07:00
continue
2026-04-05 11:02:19 -07:00
rules = document . get ( " rules " , [ ] )
if not isinstance ( rules , list ) :
2026-04-05 10:38:20 -07:00
continue
2026-04-05 11:02:19 -07:00
for rule in rules :
if not isinstance ( rule , dict ) :
continue
name = rule . get ( " name " , " Unknown " )
rule_id = rule . get ( " id " , " " )
2026-04-23 16:54:21 -07:00
confidence = rule . get ( " confidence " ) or " medium "
2026-04-05 11:02:19 -07:00
has_validation = " validation " in rule
has_revocation = " revocation " in rule
is_dependent = bool ( rule . get ( " depends_on_rule " ) )
all_rules . append ( {
" provider " : provider ,
" name " : name ,
" id " : rule_id ,
" confidence " : confidence ,
" validates " : has_validation ,
" revokes " : has_revocation ,
" dependent " : is_dependent ,
} )
2026-04-05 10:38:20 -07:00
return all_rules
def generate_markdown ( rules ) :
""" Generate the markdown page content. """
total = len ( rules )
2026-04-05 11:02:19 -07:00
detectors = sum ( 1 for r in rules if not r [ " dependent " ] )
dependent = total - detectors
2026-04-05 10:38:20 -07:00
validated = sum ( 1 for r in rules if r [ " validates " ] )
revocable = sum ( 1 for r in rules if r [ " revokes " ] )
providers = len ( set ( r [ " provider " ] for r in rules ) )
lines = [
' --- ' ,
' title: " Built-in Rules List " ' ,
2026-04-05 11:02:19 -07:00
f ' description: " Complete list of all { total } built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support. " ' ,
2026-04-05 10:38:20 -07:00
' --- ' ,
' ' ,
' # Built-in Rules ' ,
' ' ,
2026-04-05 11:02:19 -07:00
f ' Kingfisher ships with ** { total } detection rules** across ** { providers } providers** ' ,
f ' ( { detectors } detectors + { dependent } dependent rules). ' ,
2026-04-05 10:38:20 -07:00
f ' Of these, ** { validated } ** include live validation and ** { revocable } ** support direct revocation. ' ,
' ' ,
' !!! tip " Search " ' ,
' Use the search box below to filter rules by provider name, rule ID, or confidence level. ' ,
' ' ,
' <input type= " text " class= " rules-search " placeholder= " Search rules... (e.g. github, aws, anthropic) " /> ' ,
' <div class= " rules-count " ></div> ' ,
' ' ,
' <table class= " rules-table " > ' ,
' <thead> ' ,
' <tr> ' ,
' <th>Provider</th> ' ,
' <th>Rule Name</th> ' ,
' <th>Rule ID</th> ' ,
' <th>Confidence</th> ' ,
' <th>Validates</th> ' ,
' <th>Revokes</th> ' ,
' </tr> ' ,
' </thead> ' ,
' <tbody> ' ,
]
for rule in sorted ( rules , key = lambda r : ( r [ " provider " ] . lower ( ) , r [ " id " ] ) ) :
validates = " Yes " if rule [ " validates " ] else " "
revokes = " Yes " if rule [ " revokes " ] else " "
2026-04-05 11:31:03 -07:00
confidence = escape ( rule [ " confidence " ] . capitalize ( ) )
provider = escape ( rule [ " provider " ] )
name = escape ( rule [ " name " ] )
rule_id = escape ( rule [ " id " ] )
lines . append ( ' <tr> ' )
lines . append ( f ' <td> { provider } </td> ' )
lines . append ( f ' <td> { name } </td> ' )
lines . append ( f ' <td><code> { rule_id } </code></td> ' )
2026-04-05 10:38:20 -07:00
lines . append ( f ' <td> { confidence } </td> ' )
lines . append ( f ' <td> { validates } </td> ' )
lines . append ( f ' <td> { revokes } </td> ' )
2026-04-05 11:31:03 -07:00
lines . append ( ' </tr> ' )
2026-04-05 10:38:20 -07:00
lines . extend ( [
' </tbody> ' ,
' </table> ' ,
] )
return " \n " . join ( lines ) + " \n "
def main ( ) :
print ( " Generating built-in rules page... " )
rules = load_rules ( )
print ( f " Found { len ( rules ) } rules " )
content = generate_markdown ( rules )
OUTPUT . parent . mkdir ( parents = True , exist_ok = True )
with open ( OUTPUT , " w " , encoding = " utf-8 " ) as f :
f . write ( content )
print ( f " Written to { OUTPUT . relative_to ( REPO_ROOT ) } " )
if __name__ == " __main__ " :
main ( )