diff --git a/docs/changelog.d/haskell-docs-check-links.misc.md b/docs/changelog.d/haskell-docs-check-links.misc.md new file mode 100644 index 0000000..74a7eb1 --- /dev/null +++ b/docs/changelog.d/haskell-docs-check-links.misc.md @@ -0,0 +1 @@ +Add Haskell port of docs-check-links as an experiment in `stack script` single-file tooling. diff --git a/mise-tasks/docs-check-links-hs b/mise-tasks/docs-check-links-hs new file mode 100755 index 0000000..c04721b --- /dev/null +++ b/mise-tasks/docs-check-links-hs @@ -0,0 +1,265 @@ +#!/usr/bin/env stack +-- stack script --resolver lts-23.18 --package directory --package filepath --package containers +--MISE description="Validate all wiki-links point to existing doc files (Haskell)" +--MISE alias="dclhs" + +{- +Haskell port of docs-check-links. Validates that all wiki-links in +documentation point to existing files. + +Wiki-link formats supported: +- [[filename]] resolves by stem (errors if ambiguous) +- [[path/to/file]] resolves by relative path from docs root +- [[target|Display Text]] either form with display text +- [[target#Heading]] with anchor fragment (file part validated) + +Resolution mirrors Quartz's "shortest" markdownLinkResolution: +bare names resolve when unique; use paths to disambiguate duplicates. + +Usage: mise run docs-check-links-hs +-} + +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import Data.List (intercalate, isPrefixOf, sort) +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import System.Directory (doesFileExist, listDirectory, doesDirectoryExist) +import System.Exit (exitFailure, exitSuccess) +import System.FilePath ((), takeBaseName, takeExtension, makeRelative, dropExtension) +-- | Docs directory, relative to script location (mise-tasks/../docs) +docsDir :: FilePath +docsDir = "docs" + +-- | Recursively list all files under a directory +listFilesRecursive :: FilePath -> IO [FilePath] +listFilesRecursive dir = do + exists <- doesDirectoryExist dir + if not exists + then return [] + else do + entries <- listDirectory dir + let fullPaths = map (dir ) entries + files <- mapM classify fullPaths + return (concat files) + where + classify path = do + isDir <- doesDirectoryExist path + if isDir + then listFilesRecursive path + else return [path] + +-- | Check if a path component is in the file path +pathContains :: String -> FilePath -> Bool +pathContains component path = ("/" ++ component ++ "/") `isInfixOf'` ("/" ++ path ++ "/") + where + isInfixOf' needle haystack = any (isPrefixOf needle) (tails' haystack) + tails' [] = [[]] + tails' xs@(_:xs') = xs : tails' xs' + +-- | Extract wiki-links from a line, ignoring inline code +-- Returns list of (target, hasSpaces) +extractWikilinksFromLine :: String -> [(String, Bool)] +extractWikilinksFromLine line = + let lineWithoutCode = removeInlineCode line + in concatMap parseMatch (findAllWikilinks lineWithoutCode) + +-- | Remove inline code (backtick sections) from a line +removeInlineCode :: String -> String +removeInlineCode [] = [] +removeInlineCode ('`':rest) = + case break (== '`') rest of + (_, []) -> [] -- unclosed backtick, drop rest + (_, _:after) -> removeInlineCode after +removeInlineCode (c:rest) = c : removeInlineCode rest + +-- | Find all wiki-link contents from a string +findAllWikilinks :: String -> [String] +findAllWikilinks [] = [] +findAllWikilinks ('[':'[':rest) = + case break (== ']') rest of + (content, ']':']':after) -> content : findAllWikilinks after + (_, after) -> findAllWikilinks after +findAllWikilinks (_:rest) = findAllWikilinks rest + +-- | Parse a wiki-link content string into (target, hasSpaces) +parseMatch :: String -> [(String, Bool)] +parseMatch content = + let (rawTarget, pipe) = case break (== '|') content of + (t, '|':_display) -> (t, True) + (t, _) -> (t, False) + target = strip rawTarget + hasSpaces = rawTarget /= target + || (pipe && (last' rawTarget == ' ' || head' (drop 1 (dropWhile (/= '|') content)) == ' ')) + in [(target, hasSpaces)] + where + last' [] = '\0' + last' xs = last xs + head' [] = '\0' + head' (x:_) = x + +-- | Strip leading and trailing whitespace +strip :: String -> String +strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ') + +-- | Extract wiki-links from a file with line numbers +-- Returns list of (target, lineNum, hasSpaces) +extractWikilinks :: FilePath -> IO [(String, Int, Bool)] +extractWikilinks path = do + content <- readFile path + let linesWithNums = zip [1..] (lines content) + return + [ (target, lineNum, hasSpaces) + | (lineNum, line) <- linesWithNums + , (target, hasSpaces) <- extractWikilinksFromLine line + ] + +data LinkError + = Broken FilePath Int String + | Ambiguous FilePath Int String [String] + | Spaced FilePath Int String + deriving (Show) + +main :: IO () +main = do + -- Build lookup structures + allFiles <- listFilesRecursive docsDir + let mdFiles = filter (\f -> takeExtension f == ".md" && not (pathContains "changelog.d" f)) allFiles + + -- path_targets: relative paths without extension + let pathTargets = Set.fromList + [ dropExtension (makeRelative docsDir f) + | f <- mdFiles + ] + + -- stem_to_paths: map from stem to list of relative paths + let stemToPaths = foldl (\m f -> + let stem = takeBaseName f + relPath = dropExtension (makeRelative docsDir f) + in Map.insertWith (++) stem [relPath] m + ) Map.empty mdFiles + + -- Special case: build-time docs from repo root + let repoRoot = "." + changelogExists <- doesFileExist (repoRoot "CHANGELOG.md") + let (pathTargets', stemToPaths') = + if changelogExists + then ( Set.insert "CHANGELOG" pathTargets + , Map.insertWith (++) "CHANGELOG" ["CHANGELOG"] stemToPaths + ) + else (pathTargets, stemToPaths) + + -- Collect errors and linked stems + results <- mapM (\f -> do + let relPath = makeRelative docsDir f + let sourceStem = takeBaseName f + links <- extractWikilinks f + return (relPath, sourceStem, links) + ) (sort mdFiles) + + let allDocStems = Set.fromList (Map.keys stemToPaths') + let orphanExceptions = Set.fromList ["index"] + + let (errors, linkedStems) = foldl (\(errs, linked) (relPath, sourceStem, links) -> + foldl (\(errs', linked') (target, lineNum, hasSpaces) -> + if hasSpaces + then (Spaced relPath lineNum target : errs', linked') + else + let fileTarget = case break (== '#') target of + (ft, '#':_) -> if null ft then "" else ft + (ft, _) -> ft + in if null fileTarget + then (errs', linked') -- pure in-page anchor + else if '/' `elem` fileTarget + then -- path-based link + if Set.member fileTarget pathTargets' + then let stem = last (splitOn '/' fileTarget) + in (errs', if stem /= sourceStem then Set.insert stem linked' else linked') + else (Broken relPath lineNum target : errs', linked') + else -- bare stem link + case Map.lookup fileTarget stemToPaths' of + Nothing -> (Broken relPath lineNum target : errs', linked') + Just paths + | length paths > 1 -> + (Ambiguous relPath lineNum target paths : errs', linked') + | fileTarget /= sourceStem -> + (errs', Set.insert fileTarget linked') + | otherwise -> (errs', linked') + ) (errs, linked) links + ) ([], Set.empty) results + + -- Orphan detection + let orphanStems = sort $ Set.toList (allDocStems `Set.difference` linkedStems `Set.difference` orphanExceptions) + + -- Print results + putStrLn "Wiki-Link Validation" + putStrLn "" + putStrLn $ "Found " ++ show (Set.size pathTargets') ++ " valid link targets in documentation." + putStrLn "" + + let spacedErrs = [e | e@(Spaced _ _ _) <- reverse errors] + let brokenErrs = [e | e@(Broken _ _ _) <- reverse errors] + let ambiguousErrs = [e | e@(Ambiguous _ _ _ _) <- reverse errors] + let hasErrors = not (null errors) || not (null orphanStems) + + if not (null spacedErrs) then do + putStrLn "ERROR: Wiki-Links With Spaces Found" + putStrLn "Wiki-links must not have spaces in the target or around the pipe." + putStrLn "Use [[target|Display Text]] not [[target | Display Text]]." + putStrLn "" + mapM_ (\(Spaced f l t) -> + putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]]" + ) spacedErrs + putStrLn "" + else return () + + if not (null ambiguousErrs) then do + putStrLn "ERROR: Ambiguous Wiki-Links Found" + putStrLn "These bare-name links match multiple files." + putStrLn "Use a path-based link to disambiguate: [[path/to/file]]" + putStrLn "" + mapM_ (\(Ambiguous f l t paths) -> + putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]] -> " ++ intercalate ", " paths + ) ambiguousErrs + putStrLn "" + else return () + + if not (null brokenErrs) then do + putStrLn "ERROR: Broken Wiki-Links Found" + putStrLn "" + mapM_ (\(Broken f l t) -> + putStrLn $ " " ++ f ++ ":" ++ show l ++ " [[" ++ t ++ "]]" + ) brokenErrs + putStrLn "" + putStrLn "Each wiki-link target must match a filename stem or path in docs/." + putStrLn "" + else return () + + if not (null orphanStems) then do + putStrLn "ERROR: Orphan Documents Found" + putStrLn "These docs are not linked from any other document." + putStrLn "" + mapM_ (\stem -> + case Map.lookup stem stemToPaths' of + Just paths -> mapM_ (\p -> putStrLn $ " " ++ p ++ ".md (" ++ stem ++ ")") paths + Nothing -> return () + ) orphanStems + putStrLn "" + else return () + + if hasErrors + then exitFailure + else do + putStrLn "All wiki-links are valid!" + exitSuccess + +-- | Split a string on a character +splitOn :: Char -> String -> [String] +splitOn _ [] = [""] +splitOn sep s = + let (first, rest) = break (== sep) s + in first : case rest of + [] -> [] + (_:rs) -> splitOn sep rs diff --git a/mise.toml b/mise.toml index 5bb2829..f86a6b9 100644 --- a/mise.toml +++ b/mise.toml @@ -3,3 +3,4 @@ prek = "latest" pulumi = "latest" dagger = "0.20.1" +stack = "3.9.3"