Copyright	[2009..2023] Trevor L. McDonell
License	BSD
Safe Haskell	Safe-Inferred
Language	Haskell98

Foreign.CUDA.Runtime.Exec

Contents

Kernel Execution

Description

Kernel execution control for C-for-CUDA runtime interface

Synopsis

type Fun = FunPtr ()
data FunAttributes = FunAttributes {
- constSizeBytes :: !Int64
- localSizeBytes :: !Int64
- sharedSizeBytes :: !Int64
- maxKernelThreadsPerBlock :: !Int
- numRegs :: !Int
}
data FunParam where
- IArg :: !Int -> FunParam
- FArg :: !Float -> FunParam
- DArg :: !Double -> FunParam
- VArg :: Storable a => !a -> FunParam
data CacheConfig
- = None
- | Shared
- | L1
- | Equal
attributes :: Fun -> IO FunAttributes
setCacheConfig :: Fun -> CacheConfig -> IO ()
launchKernel :: Fun -> (Int, Int) -> (Int, Int, Int) -> Int64 -> Maybe Stream -> [FunParam] -> IO ()

Kernel Execution

type Fun = FunPtr () Source #

A global device function.

Note that the use of a string naming a function was deprecated in CUDA 4.1 and removed in CUDA 5.0.

data FunAttributes Source #

Constructors

FunAttributes
Fields constSizeBytes :: !Int64 localSizeBytes :: !Int64 sharedSizeBytes :: !Int64 maxKernelThreadsPerBlock :: !Int maximum block size that can be successively launched (based on register usage) numRegs :: !Int number of registers required for each thread

Instances

Instances details

Storable FunAttributes Source #
Instance details Defined in Foreign.CUDA.Runtime.Exec Methods sizeOf :: FunAttributes -> Int # alignment :: FunAttributes -> Int # peekElemOff :: Ptr FunAttributes -> Int -> IO FunAttributes # pokeElemOff :: Ptr FunAttributes -> Int -> FunAttributes -> IO () # peekByteOff :: Ptr b -> Int -> IO FunAttributes # pokeByteOff :: Ptr b -> Int -> FunAttributes -> IO () # peek :: Ptr FunAttributes -> IO FunAttributes # poke :: Ptr FunAttributes -> FunAttributes -> IO () #
Show FunAttributes Source #
Instance details Defined in Foreign.CUDA.Runtime.Exec Methods showsPrec :: Int -> FunAttributes -> ShowS # show :: FunAttributes -> String # showList :: [FunAttributes] -> ShowS #

data FunParam where Source #

Kernel function parameters. Doubles will be converted to an internal float representation on devices that do not support doubles natively.

Constructors

IArg :: !Int -> FunParam
FArg :: !Float -> FunParam
DArg :: !Double -> FunParam
VArg :: Storable a => !a -> FunParam

data CacheConfig Source #

Cache configuration preference

Constructors

None
Shared
L1
Equal

Instances

Instances details

Enum CacheConfig Source #
Instance details Defined in Foreign.CUDA.Runtime.Exec Methods succ :: CacheConfig -> CacheConfig # pred :: CacheConfig -> CacheConfig # toEnum :: Int -> CacheConfig # fromEnum :: CacheConfig -> Int # enumFrom :: CacheConfig -> [CacheConfig] # enumFromThen :: CacheConfig -> CacheConfig -> [CacheConfig] # enumFromTo :: CacheConfig -> CacheConfig -> [CacheConfig] # enumFromThenTo :: CacheConfig -> CacheConfig -> CacheConfig -> [CacheConfig] #
Show CacheConfig Source #
Instance details Defined in Foreign.CUDA.Runtime.Exec Methods showsPrec :: Int -> CacheConfig -> ShowS # show :: CacheConfig -> String # showList :: [CacheConfig] -> ShowS #
Eq CacheConfig Source #
Instance details Defined in Foreign.CUDA.Runtime.Exec Methods (==) :: CacheConfig -> CacheConfig -> Bool # (/=) :: CacheConfig -> CacheConfig -> Bool #

attributes :: Fun -> IO FunAttributes Source #

Obtain the attributes of the named global device function. This itemises the requirements to successfully launch the given kernel.

setCacheConfig :: Fun -> CacheConfig -> IO () Source #

On devices where the L1 cache and shared memory use the same hardware resources, this sets the preferred cache configuration for the given device function. This is only a preference; the driver is free to choose a different configuration as required to execute the function.

Switching between configuration modes may insert a device-side synchronisation point for streamed kernel launches

launchKernel Source #

Arguments

:: Fun	Device function symbol
-> (Int, Int)	grid dimensions
-> (Int, Int, Int)	thread block shape
-> Int64	shared memory per block (bytes)
-> Maybe Stream	(optional) execution stream
-> [FunParam]
-> IO ()

Invoke a kernel on a (gx * gy) grid of blocks, where each block contains (tx * ty * tz) threads and has access to a given number of bytes of shared memory. The launch may also be associated with a specific Stream.