-- | -- Module : Data.Array.Accelerate.LLVM.PTX.Analysis.Device -- Copyright : [2008..2017] Manuel M T Chakravarty, Gabriele Keller -- [2009..2017] Trevor L. McDonell -- License : BSD3 -- -- Maintainer : Trevor L. McDonell <tmcdonell@cse.unsw.edu.au> -- Stability : experimental -- Portability : non-portable (GHC extensions) -- module Data.Array.Accelerate.LLVM.PTX.Analysis.Device where import Data.Ord import Data.List import Data.Function import Foreign.CUDA.Driver.Device import Foreign.CUDA.Analysis.Device import qualified Foreign.CUDA.Driver as CUDA -- Select the best of the available CUDA capable devices. This prefers devices -- with higher compute capability, followed by maximum throughput. This does not -- take into account any other factors, such as whether the device is currently -- in use by another process. -- -- Ignore the possibility of emulation-mode devices, as this has been deprecated -- as of CUDA v3.0 (compute-capability == 9999.9999) -- selectBestDevice :: IO (Device, DeviceProperties) selectBestDevice = do dev <- mapM CUDA.device . enumFromTo 0 . subtract 1 =<< CUDA.count prop <- mapM CUDA.props dev return . minimumBy (flip cmp `on` snd) $ zip dev prop where compute = computeCapability flops d = multiProcessorCount d * coresPerMultiProcessor d * clockRate d cmp x y | compute x == compute y = comparing flops x y | otherwise = comparing compute x y -- Number of CUDA cores per streaming multiprocessor for a given architecture -- revision. This is the number of SIMD arithmetic units per multiprocessor, -- executing in lockstep in half-warp groupings (16 ALUs). -- coresPerMultiProcessor :: DeviceProperties -> Int coresPerMultiProcessor = coresPerMP . deviceResources