blumeops/mise-tasks/docs-check-links-hs
Erich Blume 4625d44fd7 Add Haskell port of docs-check-links via stack script
Experiment with Haskell single-file scripting using stack's script
runner, comparable to the existing uv run --script Python pattern.
Adds stack 3.9.3 to mise.toml for runtime management.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 11:52:26 -07:00

265 lines
10 KiB
Text
Executable file

#!/usr/bin/env stack
-- stack script --resolver lts-23.18 --package directory --package filepath --package containers
--MISE description="Validate all wiki-links point to existing doc files (Haskell)"
--MISE alias="dclhs"
{-
Haskell port of docs-check-links. Validates that all wiki-links in
documentation point to existing files.
Wiki-link formats supported:
- [[filename]] resolves by stem (errors if ambiguous)
- [[path/to/file]] resolves by relative path from docs root
- [[target|Display Text]] either form with display text
- [[target#Heading]] with anchor fragment (file part validated)
Resolution mirrors Quartz's "shortest" markdownLinkResolution:
bare names resolve when unique; use paths to disambiguate duplicates.
Usage: mise run docs-check-links-hs
-}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Data.List (intercalate, isPrefixOf, sort)
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import System.Directory (doesFileExist, listDirectory, doesDirectoryExist)
import System.Exit (exitFailure, exitSuccess)
import System.FilePath ((</>), takeBaseName, takeExtension, makeRelative, dropExtension)
-- | Docs directory, relative to script location (mise-tasks/../docs)
docsDir :: FilePath
docsDir = "docs"
-- | Recursively list all files under a directory
listFilesRecursive :: FilePath -> IO [FilePath]
listFilesRecursive dir = do
exists <- doesDirectoryExist dir
if not exists
then return []
else do
entries <- listDirectory dir
let fullPaths = map (dir </>) entries
files <- mapM classify fullPaths
return (concat files)
where
classify path = do
isDir <- doesDirectoryExist path
if isDir
then listFilesRecursive path
else return [path]
-- | Check if a path component is in the file path
pathContains :: String -> FilePath -> Bool
pathContains component path = ("/" ++ component ++ "/") `isInfixOf'` ("/" ++ path ++ "/")
where
isInfixOf' needle haystack = any (isPrefixOf needle) (tails' haystack)
tails' [] = [[]]
tails' xs@(_:xs') = xs : tails' xs'
-- | Extract wiki-links from a line, ignoring inline code
-- Returns list of (target, hasSpaces)
extractWikilinksFromLine :: String -> [(String, Bool)]
extractWikilinksFromLine line =
let lineWithoutCode = removeInlineCode line
in concatMap parseMatch (findAllWikilinks lineWithoutCode)
-- | Remove inline code (backtick sections) from a line
removeInlineCode :: String -> String
removeInlineCode [] = []
removeInlineCode ('`':rest) =
case break (== '`') rest of
(_, []) -> [] -- unclosed backtick, drop rest
(_, _:after) -> removeInlineCode after
removeInlineCode (c:rest) = c : removeInlineCode rest
-- | Find all wiki-link contents from a string
findAllWikilinks :: String -> [String]
findAllWikilinks [] = []
findAllWikilinks ('[':'[':rest) =
case break (== ']') rest of
(content, ']':']':after) -> content : findAllWikilinks after
(_, after) -> findAllWikilinks after
findAllWikilinks (_:rest) = findAllWikilinks rest
-- | Parse a wiki-link content string into (target, hasSpaces)
parseMatch :: String -> [(String, Bool)]
parseMatch content =
let (rawTarget, pipe) = case break (== '|') content of
(t, '|':_display) -> (t, True)
(t, _) -> (t, False)
target = strip rawTarget
hasSpaces = rawTarget /= target
|| (pipe && (last' rawTarget == ' ' || head' (drop 1 (dropWhile (/= '|') content)) == ' '))
in [(target, hasSpaces)]
where
last' [] = '\0'
last' xs = last xs
head' [] = '\0'
head' (x:_) = x
-- | Strip leading and trailing whitespace
strip :: String -> String
strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ')
-- | Extract wiki-links from a file with line numbers
-- Returns list of (target, lineNum, hasSpaces)
extractWikilinks :: FilePath -> IO [(String, Int, Bool)]
extractWikilinks path = do
content <- readFile path
let linesWithNums = zip [1..] (lines content)
return
[ (target, lineNum, hasSpaces)
| (lineNum, line) <- linesWithNums
, (target, hasSpaces) <- extractWikilinksFromLine line
]
data LinkError
= Broken FilePath Int String
| Ambiguous FilePath Int String [String]
| Spaced FilePath Int String
deriving (Show)
main :: IO ()
main = do
-- Build lookup structures
allFiles <- listFilesRecursive docsDir
let mdFiles = filter (\f -> takeExtension f == ".md" && not (pathContains "changelog.d" f)) allFiles
-- path_targets: relative paths without extension
let pathTargets = Set.fromList
[ dropExtension (makeRelative docsDir f)
| f <- mdFiles
]
-- stem_to_paths: map from stem to list of relative paths
let stemToPaths = foldl (\m f ->
let stem = takeBaseName f
relPath = dropExtension (makeRelative docsDir f)
in Map.insertWith (++) stem [relPath] m
) Map.empty mdFiles
-- Special case: build-time docs from repo root
let repoRoot = "."
changelogExists <- doesFileExist (repoRoot </> "CHANGELOG.md")
let (pathTargets', stemToPaths') =
if changelogExists
then ( Set.insert "CHANGELOG" pathTargets
, Map.insertWith (++) "CHANGELOG" ["CHANGELOG"] stemToPaths
)
else (pathTargets, stemToPaths)
-- Collect errors and linked stems
results <- mapM (\f -> do
let relPath = makeRelative docsDir f
let sourceStem = takeBaseName f
links <- extractWikilinks f
return (relPath, sourceStem, links)
) (sort mdFiles)
let allDocStems = Set.fromList (Map.keys stemToPaths')
let orphanExceptions = Set.fromList ["index"]
let (errors, linkedStems) = foldl (\(errs, linked) (relPath, sourceStem, links) ->
foldl (\(errs', linked') (target, lineNum, hasSpaces) ->
if hasSpaces
then (Spaced relPath lineNum target : errs', linked')
else
let fileTarget = case break (== '#') target of
(ft, '#':_) -> if null ft then "" else ft
(ft, _) -> ft
in if null fileTarget
then (errs', linked') -- pure in-page anchor
else if '/' `elem` fileTarget
then -- path-based link
if Set.member fileTarget pathTargets'
then let stem = last (splitOn '/' fileTarget)
in (errs', if stem /= sourceStem then Set.insert stem linked' else linked')
else (Broken relPath lineNum target : errs', linked')
else -- bare stem link
case Map.lookup fileTarget stemToPaths' of
Nothing -> (Broken relPath lineNum target : errs', linked')
Just paths
| length paths > 1 ->
(Ambiguous relPath lineNum target paths : errs', linked')
| fileTarget /= sourceStem ->
(errs', Set.insert fileTarget linked')
| otherwise -> (errs', linked')
) (errs, linked) links
) ([], Set.empty) results
-- Orphan detection
let orphanStems = sort $ Set.toList (allDocStems `Set.difference` linkedStems `Set.difference` orphanExceptions)
-- Print results
putStrLn "Wiki-Link Validation"
putStrLn ""
putStrLn $ "Found " ++ show (Set.size pathTargets') ++ " valid link targets in documentation."
putStrLn ""
let spacedErrs = [e | e@(Spaced _ _ _) <- reverse errors]
let brokenErrs = [e | e@(Broken _ _ _) <- reverse errors]
let ambiguousErrs = [e | e@(Ambiguous _ _ _ _) <- reverse errors]
let hasErrors = not (null errors) || not (null orphanStems)
if not (null spacedErrs) then do
putStrLn "ERROR: Wiki-Links With Spaces Found"
putStrLn "Wiki-links must not have spaces in the target or around the pipe."
putStrLn "Use [[target|Display Text]] not [[target | Display Text]]."
putStrLn ""
mapM_ (\(Spaced f l t) ->
putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]]"
) spacedErrs
putStrLn ""
else return ()
if not (null ambiguousErrs) then do
putStrLn "ERROR: Ambiguous Wiki-Links Found"
putStrLn "These bare-name links match multiple files."
putStrLn "Use a path-based link to disambiguate: [[path/to/file]]"
putStrLn ""
mapM_ (\(Ambiguous f l t paths) ->
putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]] -> " ++ intercalate ", " paths
) ambiguousErrs
putStrLn ""
else return ()
if not (null brokenErrs) then do
putStrLn "ERROR: Broken Wiki-Links Found"
putStrLn ""
mapM_ (\(Broken f l t) ->
putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]]"
) brokenErrs
putStrLn ""
putStrLn "Each wiki-link target must match a filename stem or path in docs/."
putStrLn ""
else return ()
if not (null orphanStems) then do
putStrLn "ERROR: Orphan Documents Found"
putStrLn "These docs are not linked from any other document."
putStrLn ""
mapM_ (\stem ->
case Map.lookup stem stemToPaths' of
Just paths -> mapM_ (\p -> putStrLn $ " " ++ p ++ ".md (" ++ stem ++ ")") paths
Nothing -> return ()
) orphanStems
putStrLn ""
else return ()
if hasErrors
then exitFailure
else do
putStrLn "All wiki-links are valid!"
exitSuccess
-- | Split a string on a character
splitOn :: Char -> String -> [String]
splitOn _ [] = [""]
splitOn sep s =
let (first, rest) = break (== sep) s
in first : case rest of
[] -> []
(_:rs) -> splitOn sep rs