{- | A generic approach to building and caching file outputs. This is a layer on top of Shake which enables build actions to be written in a "forwards" style. For example: > runPier $ action $ do > contents <- lines <$> readArtifactA (externalFile "result.txt") > let result = "result.tar" > runCommand (output result) > $ foldMap input contents > <> prog "tar" (["-cf", result] ++ map pathIn contents) This approach generally leads to simpler logic than backwards-defined build systems such as make or (normal) Shake, where each step of the build logic must be written as a new build rule. Inputs and outputs of a command must be declared up-front, using the 'input' and 'output' functions respectively. This enables isolated, deterministic build steps which are each run in their own temporary directory. Output files are stored in the location > _pier/artifact/HASH/path/to/file where @HASH@ is a string that uniquely determines the action generating that file. In particular, there is no need to worry about choosing distinct names for outputs of different commands. Note that 'Development.Shake.Forward' has similar motivation to this module, but instead uses @fsatrace@ to detect what files changed after the fact. Unfortunately, that approach is not portable. Additionally, it makes it difficult to isolate steps and make the build more reproducible (for example, to prevent the output of one step being mutated by a later one) since every output file could potentially be an input to every action. Finally, by explicitly declaring outputs we can detect sooner when a command doesn't produce the files that we expect. -} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE TypeOperators #-} module Pier.Core.Artifact ( -- * Rules artifactRules -- * Artifact , Artifact , externalFile , (/>) , replaceArtifactExtension , readArtifact , readArtifactB , doesArtifactExist , matchArtifactGlob , unfreezeArtifacts , callArtifact -- * Creating artifacts , writeArtifact , runCommand , runCommand_ , runCommandStdout , Command , message -- ** Command outputs , Output , output -- ** Command inputs , input , inputs , inputList , shadow , groupFiles -- * Running commands , prog , progA , progTemp , pathIn , withCwd , createDirectoryA ) where import Control.Monad (forM_, when, unless) import Control.Monad.IO.Class import Crypto.Hash.SHA256 import Data.ByteString.Base64 import Data.Semigroup import Data.Set (Set) import Development.Shake import Development.Shake.Classes hiding (hash) import Development.Shake.FilePath import Distribution.Simple.Utils (matchDirFileGlob) import GHC.Generics import System.Directory as Directory import System.Exit (ExitCode(..)) import System.Posix.Files (createSymbolicLink) import System.Process.Internals (translate) import qualified Data.Binary as Binary import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.Text as T import qualified Data.Text.Encoding as T import Pier.Core.Directory import Pier.Core.Persistent import Pier.Core.Run import Pier.Orphans () -- | A hermetic build step. Consists of a sequence of calls to 'message', -- 'prog'/'progA'/'progTemp', and/or 'shadow', which may be combined using '<>'/'mappend'. -- Also specifies the input 'Artifacts' that are used by those commands. data Command = Command { _commandProgs :: [Prog] , commandInputs :: Set Artifact } deriving (Typeable, Eq, Generic, Hashable, Binary, NFData) data Call = CallEnv String -- picked up from $PATH | CallArtifact Artifact | CallTemp FilePath -- Local file to this Command -- (e.g. generated by an earlier call) -- (This is a hack around shake which tries to resolve -- local files in the env.) deriving (Typeable, Eq, Generic, Hashable, Binary, NFData) data Prog = ProgCall { _progCall :: Call , _progArgs :: [String] , progCwd :: FilePath -- relative to the root of the sandbox } | Message String | Shadow Artifact FilePath deriving (Typeable, Eq, Generic, Hashable, Binary, NFData) instance Monoid Command where Command ps is `mappend` Command ps' is' = Command (ps ++ ps') (is <> is') mempty = Command [] Set.empty instance Semigroup Command -- | Run an external command-line program with the given arguments. prog :: String -> [String] -> Command prog p as = Command [ProgCall (CallEnv p) as "."] Set.empty -- | Run an artifact as an command-line program with the given arguments. progA :: Artifact -> [String] -> Command progA p as = Command [ProgCall (CallArtifact p) as "."] (Set.singleton p) -- | Run a command-line program with the given arguments, where the program -- was created by a previous program. progTemp :: FilePath -> [String] -> Command progTemp p as = Command [ProgCall (CallTemp p) as "."] Set.empty -- | Prints a status message for the user when this command runs. message :: String -> Command message s = Command [Message s] Set.empty -- | Runs a command within the given (relative) directory. withCwd :: FilePath -> Command -> Command withCwd path (Command ps as) | isAbsolute path = error $ "withCwd: expected relative path, got " ++ show path | otherwise = Command (map setPath ps) as where setPath m@Message{} = m setPath p = p { progCwd = path } -- | Specify that an 'Artifact' should be made available to program calls within this -- 'Command'. -- -- Note that the order does not matter; `input f <> cmd === cmd <> input f`. input :: Artifact -> Command input = inputs . Set.singleton inputList :: [Artifact] -> Command inputList = inputs . Set.fromList -- | Specify that a set of 'Artifact's should be made available to program calls within this -- 'Command'. inputs :: Set Artifact -> Command inputs = Command [] -- | Make a "shadow" copy of the given input artifact's by create a symlink of -- this artifact (if it is a file) or of each sub-file (transitively, if it is -- a directory). -- -- The result may be captured as output, for example when grouping multiple outputs -- of separate commands into a common directory structure. shadow :: Artifact -> FilePath -> Command shadow a f | isAbsolute f = error $ "shadowArtifact: need relative destination, found " ++ show f | otherwise = Command [Shadow a f] Set.empty -- | The output of a given command. -- -- Multiple outputs may be combined using the 'Applicative' instance. data Output a = Output [FilePath] (Hash -> a) instance Functor Output where fmap f (Output g h) = Output g (f . h) instance Applicative Output where pure = Output [] . const Output f g <*> Output f' g' = Output (f ++ f') (g <*> g') -- | Register a single output of a command. -- -- The input must be a relative path and nontrivial (i.e., not @"."@ or @""@). output :: FilePath -> Output Artifact output f | normalise f == "." = error $ "Can't output empty path " ++ show f | isAbsolute f = error $ "Can't output absolute path " ++ show f | otherwise = Output [f] $ flip Artifact (normalise f) . Built -- | Unique identifier of a command newtype Hash = Hash B.ByteString deriving (Show, Eq, Ord, Binary, NFData, Hashable, Generic) makeHash :: Binary a => a -> Hash makeHash = Hash . fixChars . dropPadding . encode . hashlazy . Binary.encode where -- Remove slashes, since the strings will appear in filepaths. -- Also remove `+` to reduce shell errors. fixChars = BC.map $ \case '/' -> '-' '+' -> '.' c -> c -- Padding just adds noise, since we don't have length requirements (and indeed -- every sha256 hash is 32 bytes) dropPadding c | BC.last c == '=' = BC.init c -- Shouldn't happen since each hash is the same length: | otherwise = c hashDir :: Hash -> FilePath hashDir h = artifactDir hashString h artifactDir :: FilePath artifactDir = pierFile "artifact" externalArtifactDir :: FilePath externalArtifactDir = artifactDir "external" hashString :: Hash -> String hashString (Hash h) = BC.unpack h -- | An 'Artifact' is a file or folder that was created by a build command. data Artifact = Artifact Source FilePath deriving (Eq, Ord, Generic, Hashable, Binary, NFData) instance Show Artifact where show (Artifact External f) = "external:" ++ show f show (Artifact (Built h) f) = hashString h ++ ":" ++ show f data Source = Built Hash | External deriving (Show, Eq, Ord, Generic, Hashable, Binary, NFData) -- | Create an 'Artifact' from an input file to the build (for example, a -- source file created by the user). -- -- If it is a relative path, changes to the file will cause rebuilds of -- Commands and Rules that dependended on it. externalFile :: FilePath -> Artifact externalFile f | null f' = error "externalFile: empty input" | artifactDir `List.isPrefixOf` f' = error $ "externalFile: forbidden prefix: " ++ show f' | otherwise = Artifact External f' where f' = normalise f -- | Create a reference to a sub-file of the given 'Artifact', which must -- refer to a directory. (/>) :: Artifact -> FilePath -> Artifact Artifact source f /> g = Artifact source $ normalise $ f g infixr 5 /> -- Same as artifactRules :: HandleTemps -> Rules () artifactRules ht = do liftIO createExternalLink commandRules ht writeArtifactRules createExternalLink :: IO () createExternalLink = do exists <- doesPathExist externalArtifactDir unless exists $ do createParentIfMissing externalArtifactDir createSymbolicLink "../.." externalArtifactDir -- | The build rule type for commands. data CommandQ = CommandQ { commandQCmd :: Command , _commandQOutputs :: [FilePath] } deriving (Eq, Generic) instance Show CommandQ where show CommandQ { commandQCmd = Command progs _ } = let msgs = List.intercalate "; " [m | Message m <- progs] in "Command" ++ if null msgs then "" else ": " ++ msgs instance Hashable CommandQ instance Binary CommandQ instance NFData CommandQ type instance RuleResult CommandQ = Hash -- TODO: sanity-check filepaths; for example, normalize, should be relative, no -- "..", etc. commandHash :: CommandQ -> Action Hash commandHash cmdQ = do let externalFiles = [f | Artifact External f <- Set.toList $ commandInputs $ commandQCmd cmdQ , isRelative f ] need externalFiles -- TODO: streaming hash userFileHashes <- liftIO $ map hash <$> mapM B.readFile externalFiles return $ makeHash ("commandHash", cmdQ, userFileHashes) -- | Run the given command, capturing the specified outputs. runCommand :: Output t -> Command -> Action t runCommand (Output outs mk) c = mk <$> askPersistent (CommandQ c outs) -- Run the given command and record its stdout. runCommandStdout :: Command -> Action String runCommandStdout c = do out <- runCommand (output stdoutOutput) c liftIO $ readFile $ pathIn out -- | Run the given command without capturing its output. Can be used to check -- consistency of the outputs of previous commands. runCommand_ :: Command -> Action () runCommand_ = runCommand (pure ()) commandRules :: HandleTemps -> Rules () commandRules ht = addPersistent $ \cmdQ@(CommandQ (Command progs inps) outs) -> do putChatty $ showCommand cmdQ h <- commandHash cmdQ createArtifacts h $ \resultDir -> -- Run the command within a separate temporary directory. -- When it's done, we'll move the explicit set of outputs into -- the result location. withPierTempDirectoryAction ht (hashString h) $ \tmpDir -> do let tmpPathOut = (tmpDir ) liftIO $ collectInputs inps tmpDir mapM_ (createParentIfMissing . tmpPathOut) outs -- Run the command, and write its stdout to a special file. root <- liftIO getCurrentDirectory stdoutStr <- B.concat <$> mapM (readProg (root tmpDir)) progs let stdoutPath = tmpPathOut stdoutOutput createParentIfMissing stdoutPath liftIO $ B.writeFile stdoutPath stdoutStr -- Check that all the output files exist, and move them -- into the output directory. liftIO $ forM_ outs $ \f -> do let src = tmpPathOut f let dest = resultDir f exist <- Directory.doesPathExist src unless exist $ error $ "runCommand: missing output " ++ show f ++ " in temporary directory " ++ show tmpDir createParentIfMissing dest renamePath src dest return h putChatty :: String -> Action () putChatty s = do v <- shakeVerbosity <$> getShakeOptions when (v >= Chatty) $ putNormal s -- TODO: more hermetic? collectInputs :: Set Artifact -> FilePath -> IO () collectInputs inps tmp = do let inps' = dedupArtifacts inps checkAllDistinctPaths inps' liftIO $ mapM_ (linkArtifact tmp) inps' -- | Create a directory containing Artifacts. -- -- If the output directory already exists, don't do anything. Otherwise, run -- the given function with a temporary directory, and then move that directory -- atomically to the final output directory for those Artifacts. -- Files and (sub)directories, as well as the directory itself, will -- be made read-only. createArtifacts :: Hash -> (FilePath -> Action ()) -> Action () createArtifacts h act = do let destDir = hashDir h -- Skip if the output directory already exists; we'll produce it atomically -- below. This could happen if Shake's database was cleaned, or if the -- action stops before Shake registers it as complete, due to either a -- synchronous or asynchronous exception. exists <- liftIO $ Directory.doesDirectoryExist destDir unless exists $ do tempDir <- createPierTempDirectory $ hashString h ++ "-result" -- Run the given action. act tempDir liftIO $ do -- Move the created directory to its final location, -- with all the files and directories inside set to -- read-only. getRegularContents tempDir >>= mapM_ (forFileRecursive_ freezePath . (tempDir )) createParentIfMissing destDir Directory.renameDirectory tempDir destDir -- Also set the directory itself to read-only, but wait -- until the last step since read-only files can't be moved. freezePath destDir -- Call a process inside the given directory and capture its stdout. -- TODO: more flexibility around the env vars -- Also: limit valid parameters for the *prog* binary (rather than taking it -- from the PATH that the `pier` executable sees). readProg :: FilePath -> Prog -> Action B.ByteString readProg _ (Message s) = do putNormal s return B.empty readProg dir (ProgCall p as cwd) = readProgCall dir p as cwd readProg dir (Shadow a0 f0) = do liftIO $ linkShadow dir a0 f0 return B.empty readProgCall :: FilePath -> Call -> [String] -> FilePath -> Action BC.ByteString readProgCall dir p as cwd = do -- hack around shake weirdness w.r.t. relative binary paths let p' = case p of CallEnv s -> s CallArtifact f -> dir pathIn f CallTemp f -> dir f (ret, Stdout out, Stderr err) <- quietly $ command [ Cwd $ dir cwd , Env defaultEnv -- stderr will get printed if there's an error. , EchoStderr False ] p' (map (spliceTempDir dir) as) case ret of ExitSuccess -> return out ExitFailure ec -> do v <- shakeVerbosity <$> getShakeOptions fail $ if v < Loud -- TODO: remove trailing newline then err else unlines [ showProg (ProgCall p as cwd) , "Working dir: " ++ translate (dir cwd) , "Exit code: " ++ show ec , "Stderr:" , err ] -- TODO: check the destination files actually exist. linkShadow :: FilePath -> Artifact -> FilePath -> IO () linkShadow dir a0 f0 = do let out = dir f0 createParentIfMissing out rootDir <- Directory.getCurrentDirectory deepLink (rootDir pathIn a0) out where deepLink a f = do isDir <- Directory.doesDirectoryExist a if isDir then do Directory.createDirectoryIfMissing False f cs <- getRegularContents a mapM_ (\c -> deepLink (a c) (f c)) cs else do srcExists <- Directory.doesFileExist a destExists <- Directory.doesPathExist f if | not srcExists -> error $ "linkShadow: missing source " ++ show a | destExists -> error $ "linkShadow: destination already exists: " ++ show f | otherwise -> createSymbolicLink a f showProg :: Prog -> String showProg (Shadow a f) = unwords ["Shadow:", pathIn a, "=>", f] showProg (Message m) = "Message: " ++ show m showProg (ProgCall call args cwd) = wrapCwd . List.intercalate " \\\n " $ showCall call : args where wrapCwd s = case cwd of "." -> s _ -> "(cd " ++ translate cwd ++ " &&\n " ++ s ++ ")" showCall (CallArtifact a) = pathIn a showCall (CallEnv f) = f showCall (CallTemp f) = f -- TODO: differentiate from CallEnv showCommand :: CommandQ -> String showCommand (CommandQ (Command progs inps) outputs) = unlines $ map showOutput outputs ++ map showInput (Set.toList inps) ++ map showProg progs where showInput i = "Input: " ++ pathIn i showOutput a = "Output: " ++ a stdoutOutput :: FilePath stdoutOutput = "_stdout" defaultEnv :: [(String, String)] defaultEnv = [("PATH", "/usr/bin:/bin")] spliceTempDir :: FilePath -> String -> String spliceTempDir tmp = T.unpack . T.replace (T.pack "${TMPDIR}") (T.pack tmp) . T.pack checkAllDistinctPaths :: Monad m => [Artifact] -> m () checkAllDistinctPaths as = case Map.keys . Map.filter (> 1) . Map.fromListWith (+) . map (\a -> (pathIn a, 1 :: Integer)) $ as of [] -> return () -- TODO: nicer error, telling where they came from: fs -> error $ "Artifacts generated from more than one command: " ++ show fs -- Remove duplicate artifacts that are both outputs of the same command, and where -- one is a subdirectory of the other (for example, constructed via `/>`). dedupArtifacts :: Set Artifact -> [Artifact] dedupArtifacts = loop . Set.toAscList where -- Loop over artifacts built from the same command. -- toAscList plus lexicographic sorting means that -- subdirectories with the same hash will appear consecutively after directories -- that contain them. loop (a@(Artifact (Built h) f) : Artifact (Built h') f' : fs) -- TODO BUG: "Picture", "Picture.hs" and Picture/Foo.hs" sort in the wrong way -- so "Picture" and "Picture/Foo.hs" aren't deduped. | h == h', (f "*") ?== f' = loop (a:fs) loop (f:fs) = f : loop fs loop [] = [] freezePath :: FilePath -> IO () freezePath f = getPermissions f >>= setPermissions f . setOwnerWritable False -- | Make all artifacts user-writable, so they can be deleted by `clean-all`. unfreezeArtifacts :: IO () unfreezeArtifacts = do exists <- Directory.doesDirectoryExist artifactDir when exists $ forFileRecursive_ unfreeze artifactDir where unfreeze f = do sym <- pathIsSymbolicLink f unless sym $ getPermissions f >>= setPermissions f . setOwnerWritable True -- TODO: don't loop on symlinks, and be more efficient? forFileRecursive_ :: (FilePath -> IO ()) -> FilePath -> IO () forFileRecursive_ act f = do isSymLink <- pathIsSymbolicLink f unless isSymLink $ do isDir <- Directory.doesDirectoryExist f if not isDir then act f else do getRegularContents f >>= mapM_ (forFileRecursive_ act . (f )) act f getRegularContents :: FilePath -> IO [FilePath] getRegularContents f = filter (not . specialFile) <$> Directory.getDirectoryContents f where specialFile "." = True specialFile ".." = True specialFile _ = False -- Symlink the artifact into the given destination directory. linkArtifact :: FilePath -> Artifact -> IO () linkArtifact _ (Artifact External f) | isAbsolute f = return () linkArtifact dir a = do curDir <- getCurrentDirectory let realPath = curDir realPathIn a let localPath = dir pathIn a checkExists realPath createParentIfMissing localPath createSymbolicLink realPath localPath where -- Sanity check checkExists f = do isFile <- Directory.doesFileExist f isDir <- Directory.doesDirectoryExist f when (not isFile && not isDir) $ error $ "linkArtifact: source does not exist: " ++ show f ++ " for artifact " ++ show a -- | Returns the relative path to an Artifact within the sandbox, when provided -- to a 'Command' by 'input'. pathIn :: Artifact -> FilePath pathIn (Artifact External f) = externalArtifactDir f pathIn (Artifact (Built h) f) = hashDir h f -- | Returns the relative path to an artifact within the root directory. realPathIn :: Artifact -> FilePath realPathIn (Artifact External f) = f realPathIn (Artifact (Built h) f) = hashDir h f -- | Replace the extension of an Artifact. In particular, -- -- > pathIn (replaceArtifactExtension f ext) == replaceExtension (pathIn f) ext@ replaceArtifactExtension :: Artifact -> String -> Artifact replaceArtifactExtension (Artifact s f) ext = Artifact s $ replaceExtension f ext -- | Read the contents of an Artifact. readArtifact :: Artifact -> Action String readArtifact (Artifact External f) = readFile' f -- includes need readArtifact f = liftIO $ readFile $ pathIn f readArtifactB :: Artifact -> Action B.ByteString readArtifactB (Artifact External f) = need [f] >> liftIO (B.readFile f) readArtifactB f = liftIO $ B.readFile $ pathIn f data WriteArtifactQ = WriteArtifactQ { writePath :: FilePath , writeContents :: String } deriving (Eq, Typeable, Generic, Hashable, Binary, NFData) instance Show WriteArtifactQ where show w = "Write " ++ writePath w type instance RuleResult WriteArtifactQ = Artifact writeArtifact :: FilePath -> String -> Action Artifact writeArtifact path contents = askPersistent $ WriteArtifactQ path contents writeArtifactRules :: Rules () writeArtifactRules = addPersistent $ \WriteArtifactQ {writePath = path, writeContents = contents} -> do let h = makeHash . T.encodeUtf8 . T.pack $ "writeArtifact: " ++ contents createArtifacts h $ \tmpDir -> do let out = tmpDir path createParentIfMissing out liftIO $ writeFile out contents return $ Artifact (Built h) $ normalise path doesArtifactExist :: Artifact -> Action Bool doesArtifactExist (Artifact External f) = Development.Shake.doesFileExist f doesArtifactExist f = liftIO $ Directory.doesFileExist (pathIn f) -- Note: this throws an exception if there's no match. matchArtifactGlob :: Artifact -> FilePath -> Action [FilePath] -- TODO: match the behavior of Cabal matchArtifactGlob (Artifact External f) g = getDirectoryFiles f [g] matchArtifactGlob a g = liftIO $ matchDirFileGlob (pathIn a) g -- TODO: merge more with above code? How hermetic should it be? callArtifact :: HandleTemps -> Set Artifact -> Artifact -> [String] -> IO () callArtifact ht inps bin args = withPierTempDirectory ht "exec" $ \tmp -> do dir <- getCurrentDirectory collectInputs (Set.insert bin inps) tmp cmd_ [Cwd tmp] (dir tmp pathIn bin) args createDirectoryA :: FilePath -> Command createDirectoryA f = prog "mkdir" ["-p", f] -- | Group source files by shadowing into a single directory. groupFiles :: Artifact -> [(FilePath, FilePath)] -> Action Artifact groupFiles dir files = let out = "group" in runCommand (output out) $ createDirectoryA out <> foldMap (\(f, g) -> shadow (dir /> f) (out g)) files