{- This file is part of link-relations. - - Written in 2015 by fr33domlover . - - ♡ Copying is an act of love. Please copy, reuse and share. - - The author(s) have dedicated all copyright and related and neighboring - rights to this software to the public domain worldwide. This software is - distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along - with this software. If not, see - . -} {-# LANGUAGE OverloadedStrings #-} module Main where import Control.Applicative import Control.Monad (guard, liftM) import Data.Char (isDigit, toUpper) import Data.Csv import Data.Foldable (mapM_) import Data.Int (Int64) import Data.List (intercalate) import Data.Maybe (fromMaybe) import Data.Monoid ((<>), mconcat) import Prelude hiding (mapM_) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.Vector as V (length, toList) ------------------------------------------------------------------------------- -- Configuration ------------------------------------------------------------------------------- -- | The IANA link relations registry data. csvFile :: FilePath csvFile = "util/link-relations-1.csv" -- | Haskell source template into which the data needs to be inserted. hsTemplateFile :: FilePath hsTemplateFile = "util/template.hs" -- | Where to write the result of writing formatted data into the template. outputFile :: FilePath outputFile = "src/Web/LinkRelations.hs.new" -- | Y-M-D date specifying the time of the last update to the IANA registry. lastUpdated :: BL.ByteString lastUpdated = "2016-01-22" ------------------------------------------------------------------------------- -- Types ------------------------------------------------------------------------------- data LinkRelInfo = LinkRelInfo { lrName :: BL.ByteString , lrDescription :: BL.ByteString , lrReferences :: BL.ByteString , lrNotes :: BL.ByteString } deriving (Show) instance FromRecord LinkRelInfo where parseRecord v = if V.length v == 4 then LinkRelInfo <$> v .! 0 <*> v .! 1 <*> v .! 2 <*> v .! 3 else empty data TemplateLine = TemplText BL.ByteString | TemplVar BL.ByteString ------------------------------------------------------------------------------- -- Utilities ------------------------------------------------------------------------------- -- | Like 'map', but applies one function to the first element and a different -- function to the rest of the elements. mapSep :: (a -> b) -> (a -> b) -> [a] -> [b] mapSep _ _ [] = [] mapSep f g (x:xs) = f x : map g xs -- | Like 'BLC.map', but applies the function /only/ to the first byte. mapFstB :: (Char -> Char) -> BL.ByteString -> BL.ByteString mapFstB f s = case BLC.uncons s of Nothing -> s Just (c, r) -> f c `BLC.cons` r --mapTail :: (a -> a) -> [a] -> [a] --mapTail _ [] = [] --mapTail f (x:xs) = x : map f xs -- | Convert @this-is-some-name@ to @ThisIsSomeName@. toCamel :: BL.ByteString -> BL.ByteString toCamel = mconcat . map (mapFstB toUpper) . BLC.split '-' -- | Apply a function to the first element. applyFst :: (a -> c) -> (a, b) -> (c, b) applyFst f (x, y) = (f x, y) -- | Break a list of words into two lists. The first is the maximal prefix -- whose length (including spaces between words) is at most N (unless the first -- word is longer than N, in which case that word is returned), and the second -- list is the rest of the word list. breakN :: Int64 -> [BL.ByteString] -> ([BL.ByteString], [BL.ByteString]) breakN len ws = applyFst reverse $ f 0 [] ws where f _ xs [] = (xs, []) f n [] (y:ys) = let l = n + BL.length y -- 'n' should be 0 here in if len <= l then ([y], ys) else f l [y] ys f n xs r@(y:ys) = let l = n + 1 + BL.length y in if len < l then (xs, r) else f l (y:xs) ys -- | Apply 'breakN' repeatedly on a word list, splitting it into a list of -- word-lists. splitN :: Int64 -> [BL.ByteString] -> [[BL.ByteString]] splitN len = f where f ws = case breakN len ws of (xs, []) -> [xs] (xs, ys) -> xs : f ys -- | Read a text file into a list of lines. readLines :: FilePath -> IO [BL.ByteString] readLines = liftM BLC.lines . BL.readFile ------------------------------------------------------------------------------- -- Haddock Comments ------------------------------------------------------------------------------- -- | Try to detect a link by suffix. Return remaining prefix and a suffix to -- append after link conversion. stripLinkSuffix :: BL.ByteString -> Maybe (BL.ByteString, BL.ByteString) stripLinkSuffix w = case BLC.unsnoc w of Just (t, ']') -> Just (t, BL.empty) Just (t, ',') -> case BLC.unsnoc t of Just (r, ']') -> Just (r, BLC.singleton ',') _ -> Nothing _ -> Nothing -- | If the given word is a link, convert to haddock format. detectLink :: BL.ByteString -> Maybe BL.ByteString detectLink w = let rfcPrefix = "[RFC" rfcDrop = 4 httpPrefix = "[http" httpDrop = 1 otherPrefix = "[" otherDrop = 1 iana = " Just $ let name = BL.drop rfcDrop t pref = if BLC.all isDigit name then "rfc" else "draft" in iana <> pref <> name <> " RFC " <> name <> ">" <> r | httpPrefix `BL.isPrefixOf` t -> Just $ "<" <> BL.drop httpDrop t <> ">" <> r | otherPrefix `BL.isPrefixOf` t -> Just $ let name = BL.drop otherDrop t in iana <> name <> " " <> name <> ">" <> r | otherwise -> Nothing Nothing -> Nothing -- | Escape some formatting characters in a non-link word. escapeWord :: BL.ByteString -> BL.ByteString escapeWord = BLC.foldr f BL.empty where f c s = if c `elem` ['/', '"', '\''] then '\\' `BLC.cons` c `BLC.cons` s else c `BLC.cons` s -- | Convert links, espcape characters, do some comment text processing. processWords :: [BL.ByteString] -> [BL.ByteString] processWords = map $ \ s -> fromMaybe (escapeWord s) $ detectLink s -- | Format a paragraph as haddock identifier comment lines. toComment :: BL.ByteString -> [BL.ByteString] toComment s = let mkFirst = ("-- | " <>) . BLC.unwords mkRest = ("-- " <>) . BLC.unwords in case breakN 74 $ processWords $ BLC.words s of (xs, []) -> [mkFirst xs] (xs, ys) -> mkFirst xs : map mkRest (splitN 76 ys) -- | Format a paragraph as haddock identifier non-first comment lines. toCommentCont :: BL.ByteString -> [BL.ByteString] toCommentCont = let mkline = ("-- " <>) . BLC.unwords in map mkline . splitN 76 . processWords . BLC.words ------------------------------------------------------------------------------- -- Registry Data ------------------------------------------------------------------------------- -- | Given the CSV file path, read it into a list of linkrel records. readData :: FilePath -> IO [LinkRelInfo] readData file = do csv <- BL.readFile file case decode HasHeader csv of Left err -> error $ "CSV parsing failed: " ++ err Right lrs -> return $ V.toList lrs ------------------------------------------------------------------------------- -- Template ------------------------------------------------------------------------------- -- | Detect whether a template line is literal text or a variable. parseLine :: BL.ByteString -> TemplateLine parseLine s = fromMaybe (TemplText s) $ do (h, t) <- BLC.uncons s guard $ h == '$' (h', t') <- BLC.uncons t guard $ h' == '{' (i, l) <- BLC.unsnoc t' guard $ l == '}' return $ TemplVar i -- | Interpolate the variable values into a template. render :: (BL.ByteString -> BL.ByteString) -> [TemplateLine] -> BL.ByteString render f = let g (TemplText s) = s `BLC.snoc` '\n' g (TemplVar v) = f v in mconcat . map g -- | Read and parse a template file. readTemplate :: FilePath -> IO [TemplateLine] readTemplate = liftM (map parseLine) . readLines ------------------------------------------------------------------------------- -- Haskell Code Generation ------------------------------------------------------------------------------- -- date ----------------------------------------------------------------------- -- | Registry revision date to insert into haddock comment. varDate :: BL.ByteString varDate = "-- /" <> lastUpdated <> "/\n" -- exports -------------------------------------------------------------------- -- | Given a linkrel name, get a matching Haskell variable name. toSymName :: BL.ByteString -> BL.ByteString toSymName = ("rel" <>) . toCamel -- | Given a linkrel name, format an export source line. formatExport :: BL.ByteString -> BL.ByteString formatExport = (" , " <>) . toSymName -- | Given a list of linkrel names, get the exports text. mkVarExports :: [BL.ByteString] -> BL.ByteString mkVarExports = BLC.unlines . map formatExport -- ctors ---------------------------------------------------------------------- -- | Given a linkrel name, get a matching data constructor name. toCtorName :: BL.ByteString -> BL.ByteString toCtorName = ("Rel" <>) . toCamel -- | Given a linkrel name, format a data constructor source line. formatCtor :: BL.ByteString -> BL.ByteString formatCtor = (" | " <>) . toCtorName -- | Given a list of linkrel names, get the constructors text. mkVarCtors :: [BL.ByteString] -> BL.ByteString mkVarCtors = BLC.unlines . map formatCtor -- hashmap -------------------------------------------------------------------- -- | Given a linkrel name, format a hashmap pair source line. formatPair :: Bool -> BL.ByteString -> BL.ByteString formatPair first name = let c = if first then "[" else "," in " " <> c <> " (\"" <> name <> "\", " <> toCtorName name <> ")" -- | Given a list of linkrel names, get the hashmap text. mkVarHashmap :: [BL.ByteString] -> BL.ByteString mkVarHashmap = BLC.unlines . mapSep (formatPair True) (formatPair False) -- tostr ---------------------------------------------------------------------- -- | Given a linkrel name, format a to-string source line. formatMatch :: BL.ByteString -> BL.ByteString formatMatch name = " " <> toCtorName name <> " -> \"" <> name <> "\"" -- | Given a list of linkrel names, get the tostr text. mkVarTostr :: [BL.ByteString] -> BL.ByteString mkVarTostr = BLC.unlines . map formatMatch -- rels ----------------------------------------------------------------------- -- | Given a linkrel, format a haddock comment. formatRelComment :: LinkRelInfo -> [BL.ByteString] formatRelComment lr = let fmt pref s = if BL.null s then [] else "--" : toCommentCont (pref <> s) in toComment (lrDescription lr) ++ fmt "Reference: " (lrReferences lr) ++ fmt "Note: " (lrNotes lr) -- | Given a linkrel, format relation symbol lines. formatRel :: LinkRelInfo -> [BL.ByteString] formatRel lr = let name = lrName lr sym = toSymName name ctor = toCtorName name in formatRelComment lr ++ [ sym <> " :: LinkRelation" , sym <> " = " <> ctor ] -- | Given a list of linkrels , get the rels text. mkVarRels :: [LinkRelInfo] -> BL.ByteString mkVarRels = BLC.unlines . intercalate [BL.empty] . map formatRel ------------------------------------------------------------------------------- -- Main ------------------------------------------------------------------------------- main :: IO () main = do lrs <- readData csvFile templ <- readTemplate hsTemplateFile let lrNames = map lrName lrs getVar s | s == "date" = varDate | s == "exports" = mkVarExports lrNames | s == "ctors" = mkVarCtors lrNames | s == "hashmap" = mkVarHashmap lrNames | s == "tostr" = mkVarTostr lrNames | s == "rels" = mkVarRels lrs | otherwise = error $ "No value defined for variable " ++ show s BL.writeFile outputFile $ render getVar templ putStrLn "Done."