{-# LINE 1 "src/Foreign/CUDA/Analysis/Device.chs" #-}
module Foreign.CUDA.Analysis.Device (
Compute(..), ComputeMode(..),
DeviceProperties(..), DeviceResources(..), Allocation(..), PCI(..),
deviceResources,
describe
) where
import Data.Int
import Text.Show.Describe
import Debug.Trace
data ComputeMode = Default
| Prohibited
| ExclusiveProcess
deriving (Eq,Show)
instance Enum ComputeMode where
succ Default = Prohibited
succ Prohibited = ExclusiveProcess
succ ExclusiveProcess = error "ComputeMode.succ: ExclusiveProcess has no successor"
pred Prohibited = Default
pred ExclusiveProcess = Prohibited
pred Default = error "ComputeMode.pred: Default has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from ExclusiveProcess
fromEnum Default = 0
fromEnum Prohibited = 2
fromEnum ExclusiveProcess = 3
toEnum 0 = Default
toEnum 2 = Prohibited
toEnum 3 = ExclusiveProcess
toEnum unmatched = error ("ComputeMode.toEnum: Cannot match " ++ show unmatched)
{-# LINE 33 "src/Foreign/CUDA/Analysis/Device.chs" #-}
instance Describe ComputeMode where
describe Default = "Multiple contexts are allowed on the device simultaneously"
describe Prohibited = "No contexts can be created on this device at this time"
describe ExclusiveProcess = "Only one context used by a single process can be present on this device at a time"
data Compute = Compute !Int !Int
deriving Eq
instance Show Compute where
show (Compute major minor) = show major ++ "." ++ show minor
instance Ord Compute where
compare (Compute m1 n1) (Compute m2 n2) =
case compare m1 m2 of
EQ -> compare n1 n2
x -> x
data DeviceProperties = DeviceProperties
{
deviceName :: !String
, computeCapability :: !Compute
, totalGlobalMem :: !Int64
, totalConstMem :: !Int64
, sharedMemPerBlock :: !Int64
, regsPerBlock :: !Int
, warpSize :: !Int
, maxThreadsPerBlock :: !Int
, maxThreadsPerMultiProcessor :: !Int
, maxBlockSize :: !(Int,Int,Int)
, maxGridSize :: !(Int,Int,Int)
, maxTextureDim1D :: !Int
, maxTextureDim2D :: !(Int,Int)
, maxTextureDim3D :: !(Int,Int,Int)
, clockRate :: !Int
, multiProcessorCount :: !Int
, memPitch :: !Int64
, memBusWidth :: !Int
, memClockRate :: !Int
, textureAlignment :: !Int64
, computeMode :: !ComputeMode
, deviceOverlap :: !Bool
, concurrentKernels :: !Bool
, eccEnabled :: !Bool
, asyncEngineCount :: !Int
, cacheMemL2 :: !Int
, pciInfo :: !PCI
, tccDriverEnabled :: !Bool
, kernelExecTimeoutEnabled :: !Bool
, integrated :: !Bool
, canMapHostMemory :: !Bool
, unifiedAddressing :: !Bool
, streamPriorities :: !Bool
, globalL1Cache :: !Bool
, localL1Cache :: !Bool
, managedMemory :: !Bool
, multiGPUBoard :: !Bool
, multiGPUBoardGroupID :: !Int
}
deriving (Show)
data PCI = PCI
{
busID :: !Int,
deviceID :: !Int,
domainID :: !Int
}
deriving (Show)
data Allocation = Warp | Block
data DeviceResources = DeviceResources
{ threadsPerWarp :: !Int
, coresPerMP :: !Int
, warpsPerMP :: !Int
, threadsPerMP :: !Int
, threadBlocksPerMP :: !Int
, sharedMemPerMP :: !Int
, maxSharedMemPerBlock :: !Int
, regFileSizePerMP :: !Int
, maxRegPerBlock :: !Int
, regAllocUnit :: !Int
, regAllocationStyle :: !Allocation
, maxRegPerThread :: !Int
, sharedMemAllocUnit :: !Int
, warpAllocUnit :: !Int
, warpRegAllocUnit :: !Int
}
deviceResources :: DeviceProperties -> DeviceResources
deviceResources = resources . computeCapability
where
resources compute = case compute of
Compute 1 0 -> resources (Compute 1 1)
Compute 1 1 -> DeviceResources
{ threadsPerWarp = 32
, coresPerMP = 8
, warpsPerMP = 24
, threadsPerMP = 768
, threadBlocksPerMP = 8
, sharedMemPerMP = 16384
, maxSharedMemPerBlock = 16384
, regFileSizePerMP = 8192
, maxRegPerBlock = 8192
, regAllocUnit = 256
, regAllocationStyle = Block
, maxRegPerThread = 124
, sharedMemAllocUnit = 512
, warpAllocUnit = 2
, warpRegAllocUnit = 256
}
Compute 1 2 -> resources (Compute 1 3)
Compute 1 3 -> (resources (Compute 1 1))
{ threadsPerMP = 1024
, warpsPerMP = 32
, regFileSizePerMP = 16384
, maxRegPerBlock = 16384
, regAllocUnit = 512
}
Compute 2 0 -> DeviceResources
{ threadsPerWarp = 32
, coresPerMP = 32
, warpsPerMP = 48
, threadsPerMP = 1536
, threadBlocksPerMP = 8
, sharedMemPerMP = 49152
, maxSharedMemPerBlock = 49152
, regFileSizePerMP = 32768
, maxRegPerBlock = 32768
, regAllocUnit = 64
, regAllocationStyle = Warp
, maxRegPerThread = 63
, sharedMemAllocUnit = 128
, warpAllocUnit = 2
, warpRegAllocUnit = 64
}
Compute 2 1 -> (resources (Compute 2 0))
{ coresPerMP = 48
}
Compute 3 0 -> DeviceResources
{ threadsPerWarp = 32
, coresPerMP = 192
, warpsPerMP = 64
, threadsPerMP = 2048
, threadBlocksPerMP = 16
, sharedMemPerMP = 49152
, maxSharedMemPerBlock = 49152
, regFileSizePerMP = 65536
, maxRegPerBlock = 65536
, regAllocUnit = 256
, regAllocationStyle = Warp
, maxRegPerThread = 63
, sharedMemAllocUnit = 256
, warpAllocUnit = 4
, warpRegAllocUnit = 256
}
Compute 3 2 -> (resources (Compute 3 5))
Compute 3 5 -> (resources (Compute 3 0))
{ maxRegPerThread = 255
}
Compute 3 7 -> (resources (Compute 3 5))
{ sharedMemPerMP = 114688
, regFileSizePerMP = 131072
}
Compute 5 0 -> DeviceResources
{ threadsPerWarp = 32
, coresPerMP = 128
, warpsPerMP = 64
, threadsPerMP = 2048
, threadBlocksPerMP = 32
, sharedMemPerMP = 65536
, maxSharedMemPerBlock = 49152
, regFileSizePerMP = 65536
, maxRegPerBlock = 65536
, regAllocUnit = 256
, regAllocationStyle = Warp
, maxRegPerThread = 255
, sharedMemAllocUnit = 256
, warpAllocUnit = 4
, warpRegAllocUnit = 256
}
Compute 5 2 -> (resources (Compute 5 0))
{ sharedMemPerMP = 98304
, maxRegPerBlock = 32768
, warpAllocUnit = 2
}
Compute 5 3 -> (resources (Compute 5 0))
{ maxRegPerBlock = 32768
, warpAllocUnit = 2
}
Compute 6 0 -> DeviceResources
{ threadsPerWarp = 32
, coresPerMP = 64
, warpsPerMP = 64
, threadsPerMP = 2048
, threadBlocksPerMP = 32
, sharedMemPerMP = 65536
, maxSharedMemPerBlock = 49152
, regFileSizePerMP = 65536
, maxRegPerBlock = 65536
, regAllocUnit = 256
, regAllocationStyle = Warp
, maxRegPerThread = 255
, sharedMemAllocUnit = 256
, warpAllocUnit = 2
, warpRegAllocUnit = 256
}
Compute 6 1 -> (resources (Compute 6 0))
{ coresPerMP = 128
, sharedMemPerMP = 98304
, warpAllocUnit = 4
}
Compute 6 2 -> (resources (Compute 6 0))
{ coresPerMP = 128
, warpsPerMP = 128
, threadBlocksPerMP = 4096
, warpAllocUnit = 4
}
_ -> trace warning $ resources (Compute 3 0)
where warning = unlines [ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
, "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]